{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:12:41.433129', 'step': 0, 'epoch': 0} {'type': 'pplx', 'content': 21944.183071258598, 'timestamp': '2025-10-01 04:12:41.436328', 'step': 0, 'epoch': 0} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:41.507905', 'step': 0, 'epoch': 1} {'type': 'loss', 'content': 0.9957584738731384, 'timestamp': '2025-10-01 04:12:41.509779', 'step': 1, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:41.541712', 'step': 1, 'epoch': 1} {'type': 'loss', 'content': 0.9864388704299927, 'timestamp': '2025-10-01 04:12:41.543551', 'step': 2, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:41.589471', 'step': 2, 'epoch': 1} {'type': 'loss', 'content': 0.9595818519592285, 'timestamp': '2025-10-01 04:12:41.591374', 'step': 3, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:41.631358', 'step': 3, 'epoch': 1} {'type': 'loss', 'content': 0.9695097208023071, 'timestamp': '2025-10-01 04:12:41.691005', 'step': 4, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:41.732331', 'step': 4, 'epoch': 1} {'type': 'loss', 'content': 0.35812512040138245, 'timestamp': '2025-10-01 04:12:41.734318', 'step': 5, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:41.776054', 'step': 5, 'epoch': 1} {'type': 'loss', 'content': 0.3029557168483734, 'timestamp': '2025-10-01 04:12:41.777908', 'step': 6, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:41.808983', 'step': 6, 'epoch': 1} {'type': 'loss', 'content': 0.34162023663520813, 'timestamp': '2025-10-01 04:12:41.810846', 'step': 7, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:41.843213', 'step': 7, 'epoch': 1} {'type': 'loss', 'content': 0.3198191523551941, 'timestamp': '2025-10-01 04:12:41.867080', 'step': 8, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:41.899771', 'step': 8, 'epoch': 1} {'type': 'loss', 'content': 0.19930104911327362, 'timestamp': '2025-10-01 04:12:41.901807', 'step': 9, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:41.933171', 'step': 9, 'epoch': 1} {'type': 'loss', 'content': 0.2492242455482483, 'timestamp': '2025-10-01 04:12:41.936184', 'step': 10, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:41.978151', 'step': 10, 'epoch': 1} {'type': 'loss', 'content': 0.13022644817829132, 'timestamp': '2025-10-01 04:12:41.980052', 'step': 11, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:42.011031', 'step': 11, 'epoch': 1} {'type': 'loss', 'content': 0.26903241872787476, 'timestamp': '2025-10-01 04:12:42.034468', 'step': 12, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:42.065238', 'step': 12, 'epoch': 1} {'type': 'loss', 'content': 0.16075129806995392, 'timestamp': '2025-10-01 04:12:42.067054', 'step': 13, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:42.101228', 'step': 13, 'epoch': 1} {'type': 'loss', 'content': 0.17580145597457886, 'timestamp': '2025-10-01 04:12:42.103044', 'step': 14, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:42.133087', 'step': 14, 'epoch': 1} {'type': 'loss', 'content': 0.1836102455854416, 'timestamp': '2025-10-01 04:12:42.135009', 'step': 15, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:42.170097', 'step': 15, 'epoch': 1} {'type': 'loss', 'content': 0.2815609276294708, 'timestamp': '2025-10-01 04:12:42.193484', 'step': 16, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:42.226436', 'step': 16, 'epoch': 1} {'type': 'loss', 'content': 0.17742805182933807, 'timestamp': '2025-10-01 04:12:42.228267', 'step': 17, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:42.259021', 'step': 17, 'epoch': 1} {'type': 'loss', 'content': 0.24413292109966278, 'timestamp': '2025-10-01 04:12:42.260853', 'step': 18, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:42.308939', 'step': 18, 'epoch': 1} {'type': 'loss', 'content': 0.2981049418449402, 'timestamp': '2025-10-01 04:12:42.310934', 'step': 19, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:42.341766', 'step': 19, 'epoch': 1} {'type': 'loss', 'content': 0.14809507131576538, 'timestamp': '2025-10-01 04:12:42.365144', 'step': 20, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:42.401171', 'step': 20, 'epoch': 1} {'type': 'loss', 'content': 0.3227277994155884, 'timestamp': '2025-10-01 04:12:42.403081', 'step': 21, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:42.434671', 'step': 21, 'epoch': 1} {'type': 'loss', 'content': 0.2695126533508301, 'timestamp': '2025-10-01 04:12:42.436576', 'step': 22, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:42.467735', 'step': 22, 'epoch': 1} {'type': 'loss', 'content': 0.13543066382408142, 'timestamp': '2025-10-01 04:12:42.469874', 'step': 23, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:42.502306', 'step': 23, 'epoch': 1} {'type': 'loss', 'content': 0.3025766909122467, 'timestamp': '2025-10-01 04:12:42.526877', 'step': 24, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:42.557943', 'step': 24, 'epoch': 1} {'type': 'loss', 'content': 0.2079235464334488, 'timestamp': '2025-10-01 04:12:42.565203', 'step': 25, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:42.604921', 'step': 25, 'epoch': 1} {'type': 'loss', 'content': 0.20723873376846313, 'timestamp': '2025-10-01 04:12:42.606797', 'step': 26, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:42.636920', 'step': 26, 'epoch': 1} {'type': 'loss', 'content': 0.3800322711467743, 'timestamp': '2025-10-01 04:12:42.638607', 'step': 27, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:42.670568', 'step': 27, 'epoch': 1} {'type': 'loss', 'content': 0.2769865095615387, 'timestamp': '2025-10-01 04:12:42.694243', 'step': 28, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:42.725407', 'step': 28, 'epoch': 1} {'type': 'loss', 'content': 0.1703578233718872, 'timestamp': '2025-10-01 04:12:42.727405', 'step': 29, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:42.761782', 'step': 29, 'epoch': 1} {'type': 'loss', 'content': 0.11640524864196777, 'timestamp': '2025-10-01 04:12:42.763643', 'step': 30, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:42.796559', 'step': 30, 'epoch': 1} {'type': 'loss', 'content': 0.21496891975402832, 'timestamp': '2025-10-01 04:12:42.798933', 'step': 31, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:42.833060', 'step': 31, 'epoch': 1} {'type': 'loss', 'content': 0.17385248839855194, 'timestamp': '2025-10-01 04:12:42.856504', 'step': 32, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:42.895002', 'step': 32, 'epoch': 1} {'type': 'loss', 'content': 0.24292851984500885, 'timestamp': '2025-10-01 04:12:42.898435', 'step': 33, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:42.931918', 'step': 33, 'epoch': 1} {'type': 'loss', 'content': 0.25934305787086487, 'timestamp': '2025-10-01 04:12:42.933882', 'step': 34, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:42.966775', 'step': 34, 'epoch': 1} {'type': 'loss', 'content': 0.3366868793964386, 'timestamp': '2025-10-01 04:12:42.968842', 'step': 35, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:43.009423', 'step': 35, 'epoch': 1} {'type': 'loss', 'content': 0.1851024329662323, 'timestamp': '2025-10-01 04:12:43.036769', 'step': 36, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:43.067901', 'step': 36, 'epoch': 1} {'type': 'loss', 'content': 0.3264882564544678, 'timestamp': '2025-10-01 04:12:43.069929', 'step': 37, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:43.101366', 'step': 37, 'epoch': 1} {'type': 'loss', 'content': 0.19407038390636444, 'timestamp': '2025-10-01 04:12:43.104070', 'step': 38, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:43.137317', 'step': 38, 'epoch': 1} {'type': 'loss', 'content': 0.320141077041626, 'timestamp': '2025-10-01 04:12:43.139344', 'step': 39, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:43.172121', 'step': 39, 'epoch': 1} {'type': 'loss', 'content': 0.2575172483921051, 'timestamp': '2025-10-01 04:12:43.195824', 'step': 40, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:43.237132', 'step': 40, 'epoch': 1} {'type': 'loss', 'content': 0.15864115953445435, 'timestamp': '2025-10-01 04:12:43.238937', 'step': 41, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:43.288513', 'step': 41, 'epoch': 1} {'type': 'loss', 'content': 0.18750397861003876, 'timestamp': '2025-10-01 04:12:43.290423', 'step': 42, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:43.329060', 'step': 42, 'epoch': 1} {'type': 'loss', 'content': 0.1709277480840683, 'timestamp': '2025-10-01 04:12:43.331011', 'step': 43, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:43.363787', 'step': 43, 'epoch': 1} {'type': 'loss', 'content': 0.257757306098938, 'timestamp': '2025-10-01 04:12:43.387151', 'step': 44, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:43.419187', 'step': 44, 'epoch': 1} {'type': 'loss', 'content': 0.24028103053569794, 'timestamp': '2025-10-01 04:12:43.421199', 'step': 45, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:43.453517', 'step': 45, 'epoch': 1} {'type': 'loss', 'content': 0.2613568902015686, 'timestamp': '2025-10-01 04:12:43.455437', 'step': 46, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:43.487361', 'step': 46, 'epoch': 1} {'type': 'loss', 'content': 0.20126263797283173, 'timestamp': '2025-10-01 04:12:43.489826', 'step': 47, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:43.522208', 'step': 47, 'epoch': 1} {'type': 'loss', 'content': 0.309055894613266, 'timestamp': '2025-10-01 04:12:43.545534', 'step': 48, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:43.582643', 'step': 48, 'epoch': 1} {'type': 'loss', 'content': 0.18682987987995148, 'timestamp': '2025-10-01 04:12:43.584743', 'step': 49, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:43.617781', 'step': 49, 'epoch': 1} {'type': 'loss', 'content': 0.19826161861419678, 'timestamp': '2025-10-01 04:12:43.619646', 'step': 50, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:43.654176', 'step': 50, 'epoch': 1} {'type': 'loss', 'content': 0.24927449226379395, 'timestamp': '2025-10-01 04:12:43.656056', 'step': 51, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:43.690616', 'step': 51, 'epoch': 1} {'type': 'loss', 'content': 0.3308446705341339, 'timestamp': '2025-10-01 04:12:43.713977', 'step': 52, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:43.749922', 'step': 52, 'epoch': 1} {'type': 'loss', 'content': 0.1948421746492386, 'timestamp': '2025-10-01 04:12:43.751961', 'step': 53, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:43.785294', 'step': 53, 'epoch': 1} {'type': 'loss', 'content': 0.21074900031089783, 'timestamp': '2025-10-01 04:12:43.787639', 'step': 54, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:43.819513', 'step': 54, 'epoch': 1} {'type': 'loss', 'content': 0.24365384876728058, 'timestamp': '2025-10-01 04:12:43.823758', 'step': 55, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:43.873576', 'step': 55, 'epoch': 1} {'type': 'loss', 'content': 0.10974559932947159, 'timestamp': '2025-10-01 04:12:43.897060', 'step': 56, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:43.931850', 'step': 56, 'epoch': 1} {'type': 'loss', 'content': 0.17287789285182953, 'timestamp': '2025-10-01 04:12:43.933759', 'step': 57, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:43.974785', 'step': 57, 'epoch': 1} {'type': 'loss', 'content': 0.2076699286699295, 'timestamp': '2025-10-01 04:12:43.976577', 'step': 58, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:44.008006', 'step': 58, 'epoch': 1} {'type': 'loss', 'content': 0.22334714233875275, 'timestamp': '2025-10-01 04:12:44.009984', 'step': 59, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:44.042292', 'step': 59, 'epoch': 1} {'type': 'loss', 'content': 0.17362605035305023, 'timestamp': '2025-10-01 04:12:44.065869', 'step': 60, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:44.100293', 'step': 60, 'epoch': 1} {'type': 'loss', 'content': 0.20023909211158752, 'timestamp': '2025-10-01 04:12:44.109883', 'step': 61, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:44.141333', 'step': 61, 'epoch': 1} {'type': 'loss', 'content': 0.14632178843021393, 'timestamp': '2025-10-01 04:12:44.144690', 'step': 62, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:44.182687', 'step': 62, 'epoch': 1} {'type': 'loss', 'content': 0.24078743159770966, 'timestamp': '2025-10-01 04:12:44.188040', 'step': 63, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:44.224738', 'step': 63, 'epoch': 1} {'type': 'loss', 'content': 0.24959935247898102, 'timestamp': '2025-10-01 04:12:44.249078', 'step': 64, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:44.281783', 'step': 64, 'epoch': 1} {'type': 'loss', 'content': 0.12313212454319, 'timestamp': '2025-10-01 04:12:44.283749', 'step': 65, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:44.325721', 'step': 65, 'epoch': 1} {'type': 'loss', 'content': 0.2607284486293793, 'timestamp': '2025-10-01 04:12:44.327708', 'step': 66, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:44.360295', 'step': 66, 'epoch': 1} {'type': 'loss', 'content': 0.3011668622493744, 'timestamp': '2025-10-01 04:12:44.366294', 'step': 67, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:44.409152', 'step': 67, 'epoch': 1} {'type': 'loss', 'content': 0.19776707887649536, 'timestamp': '2025-10-01 04:12:44.432733', 'step': 68, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:44.470280', 'step': 68, 'epoch': 1} {'type': 'loss', 'content': 0.24154770374298096, 'timestamp': '2025-10-01 04:12:44.472376', 'step': 69, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:44.506348', 'step': 69, 'epoch': 1} {'type': 'loss', 'content': 0.2402488738298416, 'timestamp': '2025-10-01 04:12:44.510912', 'step': 70, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:44.541543', 'step': 70, 'epoch': 1} {'type': 'loss', 'content': 0.1569744050502777, 'timestamp': '2025-10-01 04:12:44.543406', 'step': 71, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:44.580387', 'step': 71, 'epoch': 1} {'type': 'loss', 'content': 0.20188656449317932, 'timestamp': '2025-10-01 04:12:44.603844', 'step': 72, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:44.636996', 'step': 72, 'epoch': 1} {'type': 'loss', 'content': 0.19568605720996857, 'timestamp': '2025-10-01 04:12:44.639757', 'step': 73, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:44.673202', 'step': 73, 'epoch': 1} {'type': 'loss', 'content': 0.20348317921161652, 'timestamp': '2025-10-01 04:12:44.675107', 'step': 74, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:44.711160', 'step': 74, 'epoch': 1} {'type': 'loss', 'content': 0.18747085332870483, 'timestamp': '2025-10-01 04:12:44.713018', 'step': 75, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:44.756447', 'step': 75, 'epoch': 1} {'type': 'loss', 'content': 0.3317852318286896, 'timestamp': '2025-10-01 04:12:44.783981', 'step': 76, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:44.831276', 'step': 76, 'epoch': 1} {'type': 'loss', 'content': 0.16544541716575623, 'timestamp': '2025-10-01 04:12:44.833169', 'step': 77, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:44.874773', 'step': 77, 'epoch': 1} {'type': 'loss', 'content': 0.21267008781433105, 'timestamp': '2025-10-01 04:12:44.876590', 'step': 78, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:44.910447', 'step': 78, 'epoch': 1} {'type': 'loss', 'content': 0.172340989112854, 'timestamp': '2025-10-01 04:12:44.916074', 'step': 79, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:44.960621', 'step': 79, 'epoch': 1} {'type': 'loss', 'content': 0.27010178565979004, 'timestamp': '2025-10-01 04:12:44.984006', 'step': 80, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:45.019157', 'step': 80, 'epoch': 1} {'type': 'loss', 'content': 0.2379448264837265, 'timestamp': '2025-10-01 04:12:45.021037', 'step': 81, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:45.053718', 'step': 81, 'epoch': 1} {'type': 'loss', 'content': 0.2024919092655182, 'timestamp': '2025-10-01 04:12:45.055680', 'step': 82, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:45.087910', 'step': 82, 'epoch': 1} {'type': 'loss', 'content': 0.1448671519756317, 'timestamp': '2025-10-01 04:12:45.089981', 'step': 83, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:45.121549', 'step': 83, 'epoch': 1} {'type': 'loss', 'content': 0.23656101524829865, 'timestamp': '2025-10-01 04:12:45.145142', 'step': 84, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:45.193363', 'step': 84, 'epoch': 1} {'type': 'loss', 'content': 0.1843407154083252, 'timestamp': '2025-10-01 04:12:45.195176', 'step': 85, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:45.228656', 'step': 85, 'epoch': 1} {'type': 'loss', 'content': 0.2061959058046341, 'timestamp': '2025-10-01 04:12:45.230502', 'step': 86, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:45.263533', 'step': 86, 'epoch': 1} {'type': 'loss', 'content': 0.21094831824302673, 'timestamp': '2025-10-01 04:12:45.265788', 'step': 87, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:45.298171', 'step': 87, 'epoch': 1} {'type': 'loss', 'content': 0.22415611147880554, 'timestamp': '2025-10-01 04:12:45.321700', 'step': 88, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:45.360505', 'step': 88, 'epoch': 1} {'type': 'loss', 'content': 0.1515646129846573, 'timestamp': '2025-10-01 04:12:45.362363', 'step': 89, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:45.398904', 'step': 89, 'epoch': 1} {'type': 'loss', 'content': 0.24966305494308472, 'timestamp': '2025-10-01 04:12:45.400890', 'step': 90, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:45.439436', 'step': 90, 'epoch': 1} {'type': 'loss', 'content': 0.18105532228946686, 'timestamp': '2025-10-01 04:12:45.441767', 'step': 91, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:45.478384', 'step': 91, 'epoch': 1} {'type': 'loss', 'content': 0.16004353761672974, 'timestamp': '2025-10-01 04:12:45.501986', 'step': 92, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:45.540131', 'step': 92, 'epoch': 1} {'type': 'loss', 'content': 0.24744060635566711, 'timestamp': '2025-10-01 04:12:45.542041', 'step': 93, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:45.575833', 'step': 93, 'epoch': 1} {'type': 'loss', 'content': 0.2201777845621109, 'timestamp': '2025-10-01 04:12:45.578515', 'step': 94, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:45.609724', 'step': 94, 'epoch': 1} {'type': 'loss', 'content': 0.16145789623260498, 'timestamp': '2025-10-01 04:12:45.611379', 'step': 95, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:45.644032', 'step': 95, 'epoch': 1} {'type': 'loss', 'content': 0.2170163244009018, 'timestamp': '2025-10-01 04:12:45.668215', 'step': 96, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:45.706514', 'step': 96, 'epoch': 1} {'type': 'loss', 'content': 0.2033218890428543, 'timestamp': '2025-10-01 04:12:45.708500', 'step': 97, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:45.740211', 'step': 97, 'epoch': 1} {'type': 'loss', 'content': 0.22810523211956024, 'timestamp': '2025-10-01 04:12:45.742192', 'step': 98, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:45.775579', 'step': 98, 'epoch': 1} {'type': 'loss', 'content': 0.15735995769500732, 'timestamp': '2025-10-01 04:12:45.782755', 'step': 99, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:45.815074', 'step': 99, 'epoch': 1} {'type': 'loss', 'content': 0.25398969650268555, 'timestamp': '2025-10-01 04:12:45.838276', 'step': 100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:45.872995', 'step': 100, 'epoch': 1} {'type': 'loss', 'content': 0.24306602776050568, 'timestamp': '2025-10-01 04:12:45.874642', 'step': 101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:45.909747', 'step': 101, 'epoch': 1} {'type': 'loss', 'content': 0.23865793645381927, 'timestamp': '2025-10-01 04:12:45.911593', 'step': 102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:45.950272', 'step': 102, 'epoch': 1} {'type': 'loss', 'content': 0.3240271508693695, 'timestamp': '2025-10-01 04:12:45.952250', 'step': 103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:45.986550', 'step': 103, 'epoch': 1} {'type': 'loss', 'content': 0.2085960954427719, 'timestamp': '2025-10-01 04:12:46.018848', 'step': 104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:46.049290', 'step': 104, 'epoch': 1} {'type': 'loss', 'content': 0.17581288516521454, 'timestamp': '2025-10-01 04:12:46.051215', 'step': 105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:46.082370', 'step': 105, 'epoch': 1} {'type': 'loss', 'content': 0.2440214455127716, 'timestamp': '2025-10-01 04:12:46.084288', 'step': 106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:46.120300', 'step': 106, 'epoch': 1} {'type': 'loss', 'content': 0.33360499143600464, 'timestamp': '2025-10-01 04:12:46.122241', 'step': 107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:46.156223', 'step': 107, 'epoch': 1} {'type': 'loss', 'content': 0.32894593477249146, 'timestamp': '2025-10-01 04:12:46.179636', 'step': 108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:46.211270', 'step': 108, 'epoch': 1} {'type': 'loss', 'content': 0.19687573611736298, 'timestamp': '2025-10-01 04:12:46.213326', 'step': 109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:46.244551', 'step': 109, 'epoch': 1} {'type': 'loss', 'content': 0.2679350972175598, 'timestamp': '2025-10-01 04:12:46.246422', 'step': 110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:46.278680', 'step': 110, 'epoch': 1} {'type': 'loss', 'content': 0.1552353799343109, 'timestamp': '2025-10-01 04:12:46.280612', 'step': 111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:46.321133', 'step': 111, 'epoch': 1} {'type': 'loss', 'content': 0.17297890782356262, 'timestamp': '2025-10-01 04:12:46.344596', 'step': 112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:46.376183', 'step': 112, 'epoch': 1} {'type': 'loss', 'content': 0.20369455218315125, 'timestamp': '2025-10-01 04:12:46.378206', 'step': 113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:46.410443', 'step': 113, 'epoch': 1} {'type': 'loss', 'content': 0.3367583751678467, 'timestamp': '2025-10-01 04:12:46.412251', 'step': 114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:46.444551', 'step': 114, 'epoch': 1} {'type': 'loss', 'content': 0.1795763373374939, 'timestamp': '2025-10-01 04:12:46.446437', 'step': 115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:46.478325', 'step': 115, 'epoch': 1} {'type': 'loss', 'content': 0.2636714577674866, 'timestamp': '2025-10-01 04:12:46.502802', 'step': 116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:46.534197', 'step': 116, 'epoch': 1} {'type': 'loss', 'content': 0.19473238289356232, 'timestamp': '2025-10-01 04:12:46.536282', 'step': 117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:46.566723', 'step': 117, 'epoch': 1} {'type': 'loss', 'content': 0.21832753717899323, 'timestamp': '2025-10-01 04:12:46.568860', 'step': 118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:46.599055', 'step': 118, 'epoch': 1} {'type': 'loss', 'content': 0.22856837511062622, 'timestamp': '2025-10-01 04:12:46.601415', 'step': 119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:46.644662', 'step': 119, 'epoch': 1} {'type': 'loss', 'content': 0.26403549313545227, 'timestamp': '2025-10-01 04:12:46.668049', 'step': 120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:46.700371', 'step': 120, 'epoch': 1} {'type': 'loss', 'content': 0.2411830872297287, 'timestamp': '2025-10-01 04:12:46.702781', 'step': 121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:46.745261', 'step': 121, 'epoch': 1} {'type': 'loss', 'content': 0.18624882400035858, 'timestamp': '2025-10-01 04:12:46.747929', 'step': 122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:46.788944', 'step': 122, 'epoch': 1} {'type': 'loss', 'content': 0.2877928614616394, 'timestamp': '2025-10-01 04:12:46.790665', 'step': 123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:46.826713', 'step': 123, 'epoch': 1} {'type': 'loss', 'content': 0.18232882022857666, 'timestamp': '2025-10-01 04:12:46.852414', 'step': 124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:46.891000', 'step': 124, 'epoch': 1} {'type': 'loss', 'content': 0.21831537783145905, 'timestamp': '2025-10-01 04:12:46.892781', 'step': 125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:46.924132', 'step': 125, 'epoch': 1} {'type': 'loss', 'content': 0.17082718014717102, 'timestamp': '2025-10-01 04:12:46.926132', 'step': 126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:46.958023', 'step': 126, 'epoch': 1} {'type': 'loss', 'content': 0.19980834424495697, 'timestamp': '2025-10-01 04:12:46.960069', 'step': 127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:46.993008', 'step': 127, 'epoch': 1} {'type': 'loss', 'content': 0.37288469076156616, 'timestamp': '2025-10-01 04:12:47.016590', 'step': 128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:47.050393', 'step': 128, 'epoch': 1} {'type': 'loss', 'content': 0.20850887894630432, 'timestamp': '2025-10-01 04:12:47.053057', 'step': 129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:47.087113', 'step': 129, 'epoch': 1} {'type': 'loss', 'content': 0.19245868921279907, 'timestamp': '2025-10-01 04:12:47.089188', 'step': 130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:47.127013', 'step': 130, 'epoch': 1} {'type': 'loss', 'content': 0.13801676034927368, 'timestamp': '2025-10-01 04:12:47.128635', 'step': 131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:47.161648', 'step': 131, 'epoch': 1} {'type': 'loss', 'content': 0.19924069941043854, 'timestamp': '2025-10-01 04:12:47.184854', 'step': 132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:47.215454', 'step': 132, 'epoch': 1} {'type': 'loss', 'content': 0.2599528133869171, 'timestamp': '2025-10-01 04:12:47.217138', 'step': 133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:47.248273', 'step': 133, 'epoch': 1} {'type': 'loss', 'content': 0.3023642599582672, 'timestamp': '2025-10-01 04:12:47.250037', 'step': 134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:47.283987', 'step': 134, 'epoch': 1} {'type': 'loss', 'content': 0.1649545282125473, 'timestamp': '2025-10-01 04:12:47.285555', 'step': 135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:47.320949', 'step': 135, 'epoch': 1} {'type': 'loss', 'content': 0.19373080134391785, 'timestamp': '2025-10-01 04:12:47.344233', 'step': 136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:47.380983', 'step': 136, 'epoch': 1} {'type': 'loss', 'content': 0.28428635001182556, 'timestamp': '2025-10-01 04:12:47.382428', 'step': 137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:47.413781', 'step': 137, 'epoch': 1} {'type': 'loss', 'content': 0.2128136157989502, 'timestamp': '2025-10-01 04:12:47.415404', 'step': 138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:47.445954', 'step': 138, 'epoch': 1} {'type': 'loss', 'content': 0.18205267190933228, 'timestamp': '2025-10-01 04:12:47.447776', 'step': 139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:47.480287', 'step': 139, 'epoch': 1} {'type': 'loss', 'content': 0.3045111298561096, 'timestamp': '2025-10-01 04:12:47.503652', 'step': 140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:47.552255', 'step': 140, 'epoch': 1} {'type': 'loss', 'content': 0.21808040142059326, 'timestamp': '2025-10-01 04:12:47.554206', 'step': 141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:47.586743', 'step': 141, 'epoch': 1} {'type': 'loss', 'content': 0.19069109857082367, 'timestamp': '2025-10-01 04:12:47.589138', 'step': 142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:47.627019', 'step': 142, 'epoch': 1} {'type': 'loss', 'content': 0.21833737194538116, 'timestamp': '2025-10-01 04:12:47.628913', 'step': 143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:47.674158', 'step': 143, 'epoch': 1} {'type': 'loss', 'content': 0.2467607855796814, 'timestamp': '2025-10-01 04:12:47.697672', 'step': 144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:47.735432', 'step': 144, 'epoch': 1} {'type': 'loss', 'content': 0.16725178062915802, 'timestamp': '2025-10-01 04:12:47.737747', 'step': 145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:47.770424', 'step': 145, 'epoch': 1} {'type': 'loss', 'content': 0.27057144045829773, 'timestamp': '2025-10-01 04:12:47.772422', 'step': 146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:47.803326', 'step': 146, 'epoch': 1} {'type': 'loss', 'content': 0.23226702213287354, 'timestamp': '2025-10-01 04:12:47.805572', 'step': 147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:47.839071', 'step': 147, 'epoch': 1} {'type': 'loss', 'content': 0.20416055619716644, 'timestamp': '2025-10-01 04:12:47.862751', 'step': 148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:47.895946', 'step': 148, 'epoch': 1} {'type': 'loss', 'content': 0.12781955301761627, 'timestamp': '2025-10-01 04:12:47.897920', 'step': 149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:47.930567', 'step': 149, 'epoch': 1} {'type': 'loss', 'content': 0.2210591435432434, 'timestamp': '2025-10-01 04:12:47.941056', 'step': 150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:47.972178', 'step': 150, 'epoch': 1} {'type': 'loss', 'content': 0.15205244719982147, 'timestamp': '2025-10-01 04:12:47.974354', 'step': 151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:48.005716', 'step': 151, 'epoch': 1} {'type': 'loss', 'content': 0.17188908159732819, 'timestamp': '2025-10-01 04:12:48.029437', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:48.066172', 'step': 152, 'epoch': 1} {'type': 'loss', 'content': 0.23950615525245667, 'timestamp': '2025-10-01 04:12:48.068149', 'step': 153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:48.100418', 'step': 153, 'epoch': 1} {'type': 'loss', 'content': 0.24122919142246246, 'timestamp': '2025-10-01 04:12:48.103088', 'step': 154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:12:48.152628', 'step': 154, 'epoch': 1} {'type': 'loss', 'content': 0.3409198522567749, 'timestamp': '2025-10-01 04:12:48.159345', 'step': 155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:48.190948', 'step': 155, 'epoch': 1} {'type': 'loss', 'content': 0.2347172349691391, 'timestamp': '2025-10-01 04:12:48.215895', 'step': 156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:48.249723', 'step': 156, 'epoch': 1} {'type': 'loss', 'content': 0.15738853812217712, 'timestamp': '2025-10-01 04:12:48.252027', 'step': 157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:48.285290', 'step': 157, 'epoch': 1} {'type': 'loss', 'content': 0.22714191675186157, 'timestamp': '2025-10-01 04:12:48.296094', 'step': 158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:48.334529', 'step': 158, 'epoch': 1} {'type': 'loss', 'content': 0.16903601586818695, 'timestamp': '2025-10-01 04:12:48.336427', 'step': 159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:48.369310', 'step': 159, 'epoch': 1} {'type': 'loss', 'content': 0.21087554097175598, 'timestamp': '2025-10-01 04:12:48.394137', 'step': 160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:48.426184', 'step': 160, 'epoch': 1} {'type': 'loss', 'content': 0.21975824236869812, 'timestamp': '2025-10-01 04:12:48.429094', 'step': 161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:48.462994', 'step': 161, 'epoch': 1} {'type': 'loss', 'content': 0.3059465289115906, 'timestamp': '2025-10-01 04:12:48.464982', 'step': 162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:48.506901', 'step': 162, 'epoch': 1} {'type': 'loss', 'content': 0.19918270409107208, 'timestamp': '2025-10-01 04:12:48.509190', 'step': 163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:48.547249', 'step': 163, 'epoch': 1} {'type': 'loss', 'content': 0.17958161234855652, 'timestamp': '2025-10-01 04:12:48.570665', 'step': 164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:48.608174', 'step': 164, 'epoch': 1} {'type': 'loss', 'content': 0.27877652645111084, 'timestamp': '2025-10-01 04:12:48.610132', 'step': 165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:48.651089', 'step': 165, 'epoch': 1} {'type': 'loss', 'content': 0.16488942503929138, 'timestamp': '2025-10-01 04:12:48.653480', 'step': 166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:48.689564', 'step': 166, 'epoch': 1} {'type': 'loss', 'content': 0.121433325111866, 'timestamp': '2025-10-01 04:12:48.692164', 'step': 167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:48.723638', 'step': 167, 'epoch': 1} {'type': 'loss', 'content': 0.2016034871339798, 'timestamp': '2025-10-01 04:12:48.746968', 'step': 168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:48.777349', 'step': 168, 'epoch': 1} {'type': 'loss', 'content': 0.1484135389328003, 'timestamp': '2025-10-01 04:12:48.779253', 'step': 169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:48.809645', 'step': 169, 'epoch': 1} {'type': 'loss', 'content': 0.18458709120750427, 'timestamp': '2025-10-01 04:12:48.811752', 'step': 170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:48.844032', 'step': 170, 'epoch': 1} {'type': 'loss', 'content': 0.15566177666187286, 'timestamp': '2025-10-01 04:12:48.846228', 'step': 171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:48.876911', 'step': 171, 'epoch': 1} {'type': 'loss', 'content': 0.2882324457168579, 'timestamp': '2025-10-01 04:12:48.900495', 'step': 172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:48.935011', 'step': 172, 'epoch': 1} {'type': 'loss', 'content': 0.2651435136795044, 'timestamp': '2025-10-01 04:12:48.937356', 'step': 173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:48.974236', 'step': 173, 'epoch': 1} {'type': 'loss', 'content': 0.22531446814537048, 'timestamp': '2025-10-01 04:12:48.976642', 'step': 174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:49.009538', 'step': 174, 'epoch': 1} {'type': 'loss', 'content': 0.279746949672699, 'timestamp': '2025-10-01 04:12:49.011439', 'step': 175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:49.050475', 'step': 175, 'epoch': 1} {'type': 'loss', 'content': 0.24840769171714783, 'timestamp': '2025-10-01 04:12:49.073771', 'step': 176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:49.105341', 'step': 176, 'epoch': 1} {'type': 'loss', 'content': 0.2575526535511017, 'timestamp': '2025-10-01 04:12:49.106936', 'step': 177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:49.137784', 'step': 177, 'epoch': 1} {'type': 'loss', 'content': 0.15493883192539215, 'timestamp': '2025-10-01 04:12:49.139732', 'step': 178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:49.170714', 'step': 178, 'epoch': 1} {'type': 'loss', 'content': 0.2320641428232193, 'timestamp': '2025-10-01 04:12:49.172494', 'step': 179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:49.205596', 'step': 179, 'epoch': 1} {'type': 'loss', 'content': 0.18650174140930176, 'timestamp': '2025-10-01 04:12:49.228792', 'step': 180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:49.259904', 'step': 180, 'epoch': 1} {'type': 'loss', 'content': 0.25233662128448486, 'timestamp': '2025-10-01 04:12:49.261835', 'step': 181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:49.293321', 'step': 181, 'epoch': 1} {'type': 'loss', 'content': 0.17250217497348785, 'timestamp': '2025-10-01 04:12:49.295130', 'step': 182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:49.330612', 'step': 182, 'epoch': 1} {'type': 'loss', 'content': 0.21325469017028809, 'timestamp': '2025-10-01 04:12:49.332309', 'step': 183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:49.363695', 'step': 183, 'epoch': 1} {'type': 'loss', 'content': 0.1656583696603775, 'timestamp': '2025-10-01 04:12:49.387042', 'step': 184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:49.422121', 'step': 184, 'epoch': 1} {'type': 'loss', 'content': 0.19827085733413696, 'timestamp': '2025-10-01 04:12:49.424143', 'step': 185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:49.461794', 'step': 185, 'epoch': 1} {'type': 'loss', 'content': 0.1442987024784088, 'timestamp': '2025-10-01 04:12:49.463810', 'step': 186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:49.498194', 'step': 186, 'epoch': 1} {'type': 'loss', 'content': 0.17165014147758484, 'timestamp': '2025-10-01 04:12:49.500241', 'step': 187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:49.537494', 'step': 187, 'epoch': 1} {'type': 'loss', 'content': 0.2077910304069519, 'timestamp': '2025-10-01 04:12:49.561008', 'step': 188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:12:49.597033', 'step': 188, 'epoch': 1} {'type': 'loss', 'content': 0.26125195622444153, 'timestamp': '2025-10-01 04:12:49.599051', 'step': 189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:49.632204', 'step': 189, 'epoch': 1} {'type': 'loss', 'content': 0.3333583176136017, 'timestamp': '2025-10-01 04:12:49.634637', 'step': 190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:49.672727', 'step': 190, 'epoch': 1} {'type': 'loss', 'content': 0.14477846026420593, 'timestamp': '2025-10-01 04:12:49.674294', 'step': 191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:49.706156', 'step': 191, 'epoch': 1} {'type': 'loss', 'content': 0.3398745357990265, 'timestamp': '2025-10-01 04:12:49.729461', 'step': 192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:49.775248', 'step': 192, 'epoch': 1} {'type': 'loss', 'content': 0.22810649871826172, 'timestamp': '2025-10-01 04:12:49.777181', 'step': 193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:49.810690', 'step': 193, 'epoch': 1} {'type': 'loss', 'content': 0.24153751134872437, 'timestamp': '2025-10-01 04:12:49.812309', 'step': 194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:49.843692', 'step': 194, 'epoch': 1} {'type': 'loss', 'content': 0.19194085896015167, 'timestamp': '2025-10-01 04:12:49.846080', 'step': 195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:49.878753', 'step': 195, 'epoch': 1} {'type': 'loss', 'content': 0.22208552062511444, 'timestamp': '2025-10-01 04:12:49.902239', 'step': 196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:49.934228', 'step': 196, 'epoch': 1} {'type': 'loss', 'content': 0.20632465183734894, 'timestamp': '2025-10-01 04:12:49.936192', 'step': 197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:49.977025', 'step': 197, 'epoch': 1} {'type': 'loss', 'content': 0.328914076089859, 'timestamp': '2025-10-01 04:12:49.978948', 'step': 198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:50.019243', 'step': 198, 'epoch': 1} {'type': 'loss', 'content': 0.1318514347076416, 'timestamp': '2025-10-01 04:12:50.021153', 'step': 199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:50.056403', 'step': 199, 'epoch': 1} {'type': 'loss', 'content': 0.1270432472229004, 'timestamp': '2025-10-01 04:12:50.079855', 'step': 200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:50.112145', 'step': 200, 'epoch': 1} {'type': 'loss', 'content': 0.35408857464790344, 'timestamp': '2025-10-01 04:12:50.113982', 'step': 201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:50.147521', 'step': 201, 'epoch': 1} {'type': 'loss', 'content': 0.18257048726081848, 'timestamp': '2025-10-01 04:12:50.149158', 'step': 202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:50.183104', 'step': 202, 'epoch': 1} {'type': 'loss', 'content': 0.1371033489704132, 'timestamp': '2025-10-01 04:12:50.185523', 'step': 203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:50.215865', 'step': 203, 'epoch': 1} {'type': 'loss', 'content': 0.2449110597372055, 'timestamp': '2025-10-01 04:12:50.239055', 'step': 204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:50.271992', 'step': 204, 'epoch': 1} {'type': 'loss', 'content': 0.3193829357624054, 'timestamp': '2025-10-01 04:12:50.273942', 'step': 205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:50.305154', 'step': 205, 'epoch': 1} {'type': 'loss', 'content': 0.15918336808681488, 'timestamp': '2025-10-01 04:12:50.307056', 'step': 206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:50.340265', 'step': 206, 'epoch': 1} {'type': 'loss', 'content': 0.18967324495315552, 'timestamp': '2025-10-01 04:12:50.342188', 'step': 207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:50.374726', 'step': 207, 'epoch': 1} {'type': 'loss', 'content': 0.16560398042201996, 'timestamp': '2025-10-01 04:12:50.398223', 'step': 208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:50.437548', 'step': 208, 'epoch': 1} {'type': 'loss', 'content': 0.20276930928230286, 'timestamp': '2025-10-01 04:12:50.439472', 'step': 209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:50.474179', 'step': 209, 'epoch': 1} {'type': 'loss', 'content': 0.3339516818523407, 'timestamp': '2025-10-01 04:12:50.476014', 'step': 210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:50.506892', 'step': 210, 'epoch': 1} {'type': 'loss', 'content': 0.1681528091430664, 'timestamp': '2025-10-01 04:12:50.510410', 'step': 211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:50.545965', 'step': 211, 'epoch': 1} {'type': 'loss', 'content': 0.25764399766921997, 'timestamp': '2025-10-01 04:12:50.571467', 'step': 212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:50.609707', 'step': 212, 'epoch': 1} {'type': 'loss', 'content': 0.15797767043113708, 'timestamp': '2025-10-01 04:12:50.611511', 'step': 213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:50.642622', 'step': 213, 'epoch': 1} {'type': 'loss', 'content': 0.28287553787231445, 'timestamp': '2025-10-01 04:12:50.644538', 'step': 214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:50.676615', 'step': 214, 'epoch': 1} {'type': 'loss', 'content': 0.1753864586353302, 'timestamp': '2025-10-01 04:12:50.678419', 'step': 215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:50.722964', 'step': 215, 'epoch': 1} {'type': 'loss', 'content': 0.302380234003067, 'timestamp': '2025-10-01 04:12:50.746143', 'step': 216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:50.778891', 'step': 216, 'epoch': 1} {'type': 'loss', 'content': 0.21359509229660034, 'timestamp': '2025-10-01 04:12:50.780528', 'step': 217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:50.814717', 'step': 217, 'epoch': 1} {'type': 'loss', 'content': 0.2068203091621399, 'timestamp': '2025-10-01 04:12:50.816966', 'step': 218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:50.854742', 'step': 218, 'epoch': 1} {'type': 'loss', 'content': 0.3217077851295471, 'timestamp': '2025-10-01 04:12:50.856712', 'step': 219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:50.898255', 'step': 219, 'epoch': 1} {'type': 'loss', 'content': 0.26383572816848755, 'timestamp': '2025-10-01 04:12:50.921635', 'step': 220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:50.954713', 'step': 220, 'epoch': 1} {'type': 'loss', 'content': 0.19933989644050598, 'timestamp': '2025-10-01 04:12:50.956747', 'step': 221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:50.990292', 'step': 221, 'epoch': 1} {'type': 'loss', 'content': 0.12683749198913574, 'timestamp': '2025-10-01 04:12:50.998751', 'step': 222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.030054', 'step': 222, 'epoch': 1} {'type': 'loss', 'content': 0.22380684316158295, 'timestamp': '2025-10-01 04:12:51.031779', 'step': 223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:51.061721', 'step': 223, 'epoch': 1} {'type': 'loss', 'content': 0.19793038070201874, 'timestamp': '2025-10-01 04:12:51.084834', 'step': 224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.131341', 'step': 224, 'epoch': 1} {'type': 'loss', 'content': 0.175076425075531, 'timestamp': '2025-10-01 04:12:51.133014', 'step': 225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.172073', 'step': 225, 'epoch': 1} {'type': 'loss', 'content': 0.2323109656572342, 'timestamp': '2025-10-01 04:12:51.173767', 'step': 226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:51.216191', 'step': 226, 'epoch': 1} {'type': 'loss', 'content': 0.22772331535816193, 'timestamp': '2025-10-01 04:12:51.217917', 'step': 227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:51.248740', 'step': 227, 'epoch': 1} {'type': 'loss', 'content': 0.20947390794754028, 'timestamp': '2025-10-01 04:12:51.272079', 'step': 228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.306062', 'step': 228, 'epoch': 1} {'type': 'loss', 'content': 0.1998039186000824, 'timestamp': '2025-10-01 04:12:51.307982', 'step': 229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:51.341250', 'step': 229, 'epoch': 1} {'type': 'loss', 'content': 0.2567136883735657, 'timestamp': '2025-10-01 04:12:51.344097', 'step': 230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.377097', 'step': 230, 'epoch': 1} {'type': 'loss', 'content': 0.1701449751853943, 'timestamp': '2025-10-01 04:12:51.379867', 'step': 231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.413601', 'step': 231, 'epoch': 1} {'type': 'loss', 'content': 0.2908957004547119, 'timestamp': '2025-10-01 04:12:51.437149', 'step': 232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:51.469632', 'step': 232, 'epoch': 1} {'type': 'loss', 'content': 0.24319903552532196, 'timestamp': '2025-10-01 04:12:51.471300', 'step': 233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:51.510042', 'step': 233, 'epoch': 1} {'type': 'loss', 'content': 0.18412528932094574, 'timestamp': '2025-10-01 04:12:51.511986', 'step': 234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:51.547230', 'step': 234, 'epoch': 1} {'type': 'loss', 'content': 0.2501911222934723, 'timestamp': '2025-10-01 04:12:51.555367', 'step': 235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:51.589767', 'step': 235, 'epoch': 1} {'type': 'loss', 'content': 0.131170853972435, 'timestamp': '2025-10-01 04:12:51.613474', 'step': 236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.645761', 'step': 236, 'epoch': 1} {'type': 'loss', 'content': 0.26105713844299316, 'timestamp': '2025-10-01 04:12:51.647756', 'step': 237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.679698', 'step': 237, 'epoch': 1} {'type': 'loss', 'content': 0.27991750836372375, 'timestamp': '2025-10-01 04:12:51.681548', 'step': 238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.713900', 'step': 238, 'epoch': 1} {'type': 'loss', 'content': 0.1682296246290207, 'timestamp': '2025-10-01 04:12:51.715815', 'step': 239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:51.747608', 'step': 239, 'epoch': 1} {'type': 'loss', 'content': 0.20278048515319824, 'timestamp': '2025-10-01 04:12:51.770988', 'step': 240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.802346', 'step': 240, 'epoch': 1} {'type': 'loss', 'content': 0.2229943871498108, 'timestamp': '2025-10-01 04:12:51.804387', 'step': 241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:51.836526', 'step': 241, 'epoch': 1} {'type': 'loss', 'content': 0.20784319937229156, 'timestamp': '2025-10-01 04:12:51.838476', 'step': 242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.871572', 'step': 242, 'epoch': 1} {'type': 'loss', 'content': 0.36373284459114075, 'timestamp': '2025-10-01 04:12:51.873648', 'step': 243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:51.912406', 'step': 243, 'epoch': 1} {'type': 'loss', 'content': 0.21279402077198029, 'timestamp': '2025-10-01 04:12:51.936159', 'step': 244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:51.969560', 'step': 244, 'epoch': 1} {'type': 'loss', 'content': 0.2853395938873291, 'timestamp': '2025-10-01 04:12:51.971578', 'step': 245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:52.004950', 'step': 245, 'epoch': 1} {'type': 'loss', 'content': 0.18716487288475037, 'timestamp': '2025-10-01 04:12:52.007148', 'step': 246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:52.039763', 'step': 246, 'epoch': 1} {'type': 'loss', 'content': 0.14760950207710266, 'timestamp': '2025-10-01 04:12:52.041609', 'step': 247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:52.076065', 'step': 247, 'epoch': 1} {'type': 'loss', 'content': 0.1149551048874855, 'timestamp': '2025-10-01 04:12:52.099473', 'step': 248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:52.133248', 'step': 248, 'epoch': 1} {'type': 'loss', 'content': 0.24042412638664246, 'timestamp': '2025-10-01 04:12:52.135406', 'step': 249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:52.174012', 'step': 249, 'epoch': 1} {'type': 'loss', 'content': 0.1986067295074463, 'timestamp': '2025-10-01 04:12:52.175898', 'step': 250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:52.214711', 'step': 250, 'epoch': 1} {'type': 'loss', 'content': 0.2050861120223999, 'timestamp': '2025-10-01 04:12:52.216749', 'step': 251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:52.249400', 'step': 251, 'epoch': 1} {'type': 'loss', 'content': 0.19675154983997345, 'timestamp': '2025-10-01 04:12:52.272819', 'step': 252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:52.309598', 'step': 252, 'epoch': 1} {'type': 'loss', 'content': 0.23571771383285522, 'timestamp': '2025-10-01 04:12:52.311469', 'step': 253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:52.344599', 'step': 253, 'epoch': 1} {'type': 'loss', 'content': 0.18743591010570526, 'timestamp': '2025-10-01 04:12:52.346517', 'step': 254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:52.385353', 'step': 254, 'epoch': 1} {'type': 'loss', 'content': 0.131078839302063, 'timestamp': '2025-10-01 04:12:52.387260', 'step': 255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:52.418326', 'step': 255, 'epoch': 1} {'type': 'loss', 'content': 0.13164398074150085, 'timestamp': '2025-10-01 04:12:52.444789', 'step': 256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:52.475451', 'step': 256, 'epoch': 1} {'type': 'loss', 'content': 0.2511514723300934, 'timestamp': '2025-10-01 04:12:52.477319', 'step': 257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:52.508072', 'step': 257, 'epoch': 1} {'type': 'loss', 'content': 0.14766712486743927, 'timestamp': '2025-10-01 04:12:52.509977', 'step': 258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:52.548724', 'step': 258, 'epoch': 1} {'type': 'loss', 'content': 0.12184775620698929, 'timestamp': '2025-10-01 04:12:52.550859', 'step': 259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:52.581967', 'step': 259, 'epoch': 1} {'type': 'loss', 'content': 0.24833475053310394, 'timestamp': '2025-10-01 04:12:52.605568', 'step': 260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:52.638978', 'step': 260, 'epoch': 1} {'type': 'loss', 'content': 0.16800206899642944, 'timestamp': '2025-10-01 04:12:52.640886', 'step': 261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:52.673239', 'step': 261, 'epoch': 1} {'type': 'loss', 'content': 0.12669053673744202, 'timestamp': '2025-10-01 04:12:52.675173', 'step': 262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:52.713690', 'step': 262, 'epoch': 1} {'type': 'loss', 'content': 0.21353617310523987, 'timestamp': '2025-10-01 04:12:52.715743', 'step': 263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:12:52.766966', 'step': 263, 'epoch': 1} {'type': 'loss', 'content': 0.24512089788913727, 'timestamp': '2025-10-01 04:12:52.792101', 'step': 264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:52.835027', 'step': 264, 'epoch': 1} {'type': 'loss', 'content': 0.22294481098651886, 'timestamp': '2025-10-01 04:12:52.836876', 'step': 265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:52.873362', 'step': 265, 'epoch': 1} {'type': 'loss', 'content': 0.135482057929039, 'timestamp': '2025-10-01 04:12:52.875215', 'step': 266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:52.907149', 'step': 266, 'epoch': 1} {'type': 'loss', 'content': 0.13929764926433563, 'timestamp': '2025-10-01 04:12:52.909150', 'step': 267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:52.942097', 'step': 267, 'epoch': 1} {'type': 'loss', 'content': 0.3051658272743225, 'timestamp': '2025-10-01 04:12:52.965470', 'step': 268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:52.997070', 'step': 268, 'epoch': 1} {'type': 'loss', 'content': 0.1898784637451172, 'timestamp': '2025-10-01 04:12:52.999067', 'step': 269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:53.032577', 'step': 269, 'epoch': 1} {'type': 'loss', 'content': 0.14522893726825714, 'timestamp': '2025-10-01 04:12:53.034608', 'step': 270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:53.069832', 'step': 270, 'epoch': 1} {'type': 'loss', 'content': 0.25682565569877625, 'timestamp': '2025-10-01 04:12:53.072130', 'step': 271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:53.103057', 'step': 271, 'epoch': 1} {'type': 'loss', 'content': 0.23005825281143188, 'timestamp': '2025-10-01 04:12:53.126496', 'step': 272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:53.159134', 'step': 272, 'epoch': 1} {'type': 'loss', 'content': 0.1475125551223755, 'timestamp': '2025-10-01 04:12:53.161611', 'step': 273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:53.193428', 'step': 273, 'epoch': 1} {'type': 'loss', 'content': 0.2007136344909668, 'timestamp': '2025-10-01 04:12:53.195426', 'step': 274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:53.227959', 'step': 274, 'epoch': 1} {'type': 'loss', 'content': 0.1969897747039795, 'timestamp': '2025-10-01 04:12:53.229865', 'step': 275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:53.260688', 'step': 275, 'epoch': 1} {'type': 'loss', 'content': 0.15851949155330658, 'timestamp': '2025-10-01 04:12:53.284067', 'step': 276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:53.326844', 'step': 276, 'epoch': 1} {'type': 'loss', 'content': 0.13964368402957916, 'timestamp': '2025-10-01 04:12:53.338732', 'step': 277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:53.369439', 'step': 277, 'epoch': 1} {'type': 'loss', 'content': 0.25672030448913574, 'timestamp': '2025-10-01 04:12:53.371341', 'step': 278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:53.402342', 'step': 278, 'epoch': 1} {'type': 'loss', 'content': 0.1852806955575943, 'timestamp': '2025-10-01 04:12:53.404199', 'step': 279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:53.434541', 'step': 279, 'epoch': 1} {'type': 'loss', 'content': 0.22493016719818115, 'timestamp': '2025-10-01 04:12:53.457765', 'step': 280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:53.489328', 'step': 280, 'epoch': 1} {'type': 'loss', 'content': 0.1754855066537857, 'timestamp': '2025-10-01 04:12:53.491200', 'step': 281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:53.531267', 'step': 281, 'epoch': 1} {'type': 'loss', 'content': 0.19267815351486206, 'timestamp': '2025-10-01 04:12:53.533221', 'step': 282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:53.563992', 'step': 282, 'epoch': 1} {'type': 'loss', 'content': 0.1892746388912201, 'timestamp': '2025-10-01 04:12:53.573034', 'step': 283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:53.604214', 'step': 283, 'epoch': 1} {'type': 'loss', 'content': 0.15793487429618835, 'timestamp': '2025-10-01 04:12:53.634311', 'step': 284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:53.665714', 'step': 284, 'epoch': 1} {'type': 'loss', 'content': 0.22123542428016663, 'timestamp': '2025-10-01 04:12:53.667734', 'step': 285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:53.698318', 'step': 285, 'epoch': 1} {'type': 'loss', 'content': 0.2833268642425537, 'timestamp': '2025-10-01 04:12:53.703529', 'step': 286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:53.740371', 'step': 286, 'epoch': 1} {'type': 'loss', 'content': 0.32814210653305054, 'timestamp': '2025-10-01 04:12:53.742753', 'step': 287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:53.774352', 'step': 287, 'epoch': 1} {'type': 'loss', 'content': 0.3205137252807617, 'timestamp': '2025-10-01 04:12:53.797897', 'step': 288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:53.831957', 'step': 288, 'epoch': 1} {'type': 'loss', 'content': 0.2728491425514221, 'timestamp': '2025-10-01 04:12:53.833968', 'step': 289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:53.863852', 'step': 289, 'epoch': 1} {'type': 'loss', 'content': 0.15184339880943298, 'timestamp': '2025-10-01 04:12:53.866124', 'step': 290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:53.896046', 'step': 290, 'epoch': 1} {'type': 'loss', 'content': 0.1617894470691681, 'timestamp': '2025-10-01 04:12:53.897872', 'step': 291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:53.931091', 'step': 291, 'epoch': 1} {'type': 'loss', 'content': 0.27978160977363586, 'timestamp': '2025-10-01 04:12:53.958354', 'step': 292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:53.988244', 'step': 292, 'epoch': 1} {'type': 'loss', 'content': 0.33328938484191895, 'timestamp': '2025-10-01 04:12:53.990036', 'step': 293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:54.020786', 'step': 293, 'epoch': 1} {'type': 'loss', 'content': 0.19903375208377838, 'timestamp': '2025-10-01 04:12:54.024867', 'step': 294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:54.054589', 'step': 294, 'epoch': 1} {'type': 'loss', 'content': 0.1866399198770523, 'timestamp': '2025-10-01 04:12:54.056746', 'step': 295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:54.087770', 'step': 295, 'epoch': 1} {'type': 'loss', 'content': 0.17042002081871033, 'timestamp': '2025-10-01 04:12:54.110969', 'step': 296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:54.143877', 'step': 296, 'epoch': 1} {'type': 'loss', 'content': 0.16628654301166534, 'timestamp': '2025-10-01 04:12:54.145856', 'step': 297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:54.181933', 'step': 297, 'epoch': 1} {'type': 'loss', 'content': 0.13510073721408844, 'timestamp': '2025-10-01 04:12:54.183779', 'step': 298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:54.214185', 'step': 298, 'epoch': 1} {'type': 'loss', 'content': 0.2362198829650879, 'timestamp': '2025-10-01 04:12:54.216036', 'step': 299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:54.252158', 'step': 299, 'epoch': 1} {'type': 'loss', 'content': 0.22118237614631653, 'timestamp': '2025-10-01 04:12:54.275535', 'step': 300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:54.309483', 'step': 300, 'epoch': 1} {'type': 'loss', 'content': 0.2933639883995056, 'timestamp': '2025-10-01 04:12:54.311469', 'step': 301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:54.345589', 'step': 301, 'epoch': 1} {'type': 'loss', 'content': 0.2336738258600235, 'timestamp': '2025-10-01 04:12:54.352103', 'step': 302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:54.390902', 'step': 302, 'epoch': 1} {'type': 'loss', 'content': 0.21477709710597992, 'timestamp': '2025-10-01 04:12:54.392897', 'step': 303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:54.424251', 'step': 303, 'epoch': 1} {'type': 'loss', 'content': 0.18551884591579437, 'timestamp': '2025-10-01 04:12:54.447919', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-10-01 04:12:54.500277', 'step': 304, 'epoch': 1} {'type': 'loss', 'content': 0.21852266788482666, 'timestamp': '2025-10-01 04:12:54.511247', 'step': 305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:54.544662', 'step': 305, 'epoch': 1} {'type': 'loss', 'content': 0.2816569209098816, 'timestamp': '2025-10-01 04:12:54.546473', 'step': 306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:54.582924', 'step': 306, 'epoch': 1} {'type': 'loss', 'content': 0.23053458333015442, 'timestamp': '2025-10-01 04:12:54.584921', 'step': 307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:54.626810', 'step': 307, 'epoch': 1} {'type': 'loss', 'content': 0.1671282798051834, 'timestamp': '2025-10-01 04:12:54.650243', 'step': 308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:54.691655', 'step': 308, 'epoch': 1} {'type': 'loss', 'content': 0.16549152135849, 'timestamp': '2025-10-01 04:12:54.697915', 'step': 309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:54.734271', 'step': 309, 'epoch': 1} {'type': 'loss', 'content': 0.21364976465702057, 'timestamp': '2025-10-01 04:12:54.736152', 'step': 310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:54.768653', 'step': 310, 'epoch': 1} {'type': 'loss', 'content': 0.2083757370710373, 'timestamp': '2025-10-01 04:12:54.770643', 'step': 311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:54.802699', 'step': 311, 'epoch': 1} {'type': 'loss', 'content': 0.16973207890987396, 'timestamp': '2025-10-01 04:12:54.826773', 'step': 312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:54.857845', 'step': 312, 'epoch': 1} {'type': 'loss', 'content': 0.13640879094600677, 'timestamp': '2025-10-01 04:12:54.859997', 'step': 313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:54.891686', 'step': 313, 'epoch': 1} {'type': 'loss', 'content': 0.20195233821868896, 'timestamp': '2025-10-01 04:12:54.893547', 'step': 314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:54.923958', 'step': 314, 'epoch': 1} {'type': 'loss', 'content': 0.2601877450942993, 'timestamp': '2025-10-01 04:12:54.925936', 'step': 315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:54.957067', 'step': 315, 'epoch': 1} {'type': 'loss', 'content': 0.1500694900751114, 'timestamp': '2025-10-01 04:12:54.980595', 'step': 316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:55.012211', 'step': 316, 'epoch': 1} {'type': 'loss', 'content': 0.17680387198925018, 'timestamp': '2025-10-01 04:12:55.014231', 'step': 317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:55.048008', 'step': 317, 'epoch': 1} {'type': 'loss', 'content': 0.13308314979076385, 'timestamp': '2025-10-01 04:12:55.050096', 'step': 318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:55.092374', 'step': 318, 'epoch': 1} {'type': 'loss', 'content': 0.20252437889575958, 'timestamp': '2025-10-01 04:12:55.094323', 'step': 319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:55.126309', 'step': 319, 'epoch': 1} {'type': 'loss', 'content': 0.24441730976104736, 'timestamp': '2025-10-01 04:12:55.159360', 'step': 320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:55.195821', 'step': 320, 'epoch': 1} {'type': 'loss', 'content': 0.21689459681510925, 'timestamp': '2025-10-01 04:12:55.197832', 'step': 321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:55.230408', 'step': 321, 'epoch': 1} {'type': 'loss', 'content': 0.17164559662342072, 'timestamp': '2025-10-01 04:12:55.232418', 'step': 322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:55.269235', 'step': 322, 'epoch': 1} {'type': 'loss', 'content': 0.1940157562494278, 'timestamp': '2025-10-01 04:12:55.271138', 'step': 323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:55.304481', 'step': 323, 'epoch': 1} {'type': 'loss', 'content': 0.16471779346466064, 'timestamp': '2025-10-01 04:12:55.328223', 'step': 324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:55.362071', 'step': 324, 'epoch': 1} {'type': 'loss', 'content': 0.16165125370025635, 'timestamp': '2025-10-01 04:12:55.364305', 'step': 325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:55.395259', 'step': 325, 'epoch': 1} {'type': 'loss', 'content': 0.2638111710548401, 'timestamp': '2025-10-01 04:12:55.397144', 'step': 326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:55.430546', 'step': 326, 'epoch': 1} {'type': 'loss', 'content': 0.12288150936365128, 'timestamp': '2025-10-01 04:12:55.432620', 'step': 327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:55.462856', 'step': 327, 'epoch': 1} {'type': 'loss', 'content': 0.22460418939590454, 'timestamp': '2025-10-01 04:12:55.486861', 'step': 328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:55.520358', 'step': 328, 'epoch': 1} {'type': 'loss', 'content': 0.26227518916130066, 'timestamp': '2025-10-01 04:12:55.522520', 'step': 329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:55.552765', 'step': 329, 'epoch': 1} {'type': 'loss', 'content': 0.14533501863479614, 'timestamp': '2025-10-01 04:12:55.555157', 'step': 330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:55.585304', 'step': 330, 'epoch': 1} {'type': 'loss', 'content': 0.18707606196403503, 'timestamp': '2025-10-01 04:12:55.587853', 'step': 331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:55.619105', 'step': 331, 'epoch': 1} {'type': 'loss', 'content': 0.24511724710464478, 'timestamp': '2025-10-01 04:12:55.644694', 'step': 332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:55.675043', 'step': 332, 'epoch': 1} {'type': 'loss', 'content': 0.14086653292179108, 'timestamp': '2025-10-01 04:12:55.677152', 'step': 333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:55.706958', 'step': 333, 'epoch': 1} {'type': 'loss', 'content': 0.26986172795295715, 'timestamp': '2025-10-01 04:12:55.708701', 'step': 334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:55.739038', 'step': 334, 'epoch': 1} {'type': 'loss', 'content': 0.11836495250463486, 'timestamp': '2025-10-01 04:12:55.741000', 'step': 335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:55.771594', 'step': 335, 'epoch': 1} {'type': 'loss', 'content': 0.1891240030527115, 'timestamp': '2025-10-01 04:12:55.795241', 'step': 336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:55.826587', 'step': 336, 'epoch': 1} {'type': 'loss', 'content': 0.20784872770309448, 'timestamp': '2025-10-01 04:12:55.828767', 'step': 337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:55.859704', 'step': 337, 'epoch': 1} {'type': 'loss', 'content': 0.16231629252433777, 'timestamp': '2025-10-01 04:12:55.862334', 'step': 338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:55.893577', 'step': 338, 'epoch': 1} {'type': 'loss', 'content': 0.293427973985672, 'timestamp': '2025-10-01 04:12:55.896098', 'step': 339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:55.929055', 'step': 339, 'epoch': 1} {'type': 'loss', 'content': 0.26065537333488464, 'timestamp': '2025-10-01 04:12:55.952622', 'step': 340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:55.984223', 'step': 340, 'epoch': 1} {'type': 'loss', 'content': 0.21553653478622437, 'timestamp': '2025-10-01 04:12:55.987031', 'step': 341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:56.018155', 'step': 341, 'epoch': 1} {'type': 'loss', 'content': 0.15937955677509308, 'timestamp': '2025-10-01 04:12:56.020383', 'step': 342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:56.052773', 'step': 342, 'epoch': 1} {'type': 'loss', 'content': 0.18080537021160126, 'timestamp': '2025-10-01 04:12:56.054839', 'step': 343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:56.091457', 'step': 343, 'epoch': 1} {'type': 'loss', 'content': 0.1470547914505005, 'timestamp': '2025-10-01 04:12:56.115109', 'step': 344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:56.151745', 'step': 344, 'epoch': 1} {'type': 'loss', 'content': 0.2819662094116211, 'timestamp': '2025-10-01 04:12:56.155700', 'step': 345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:56.186681', 'step': 345, 'epoch': 1} {'type': 'loss', 'content': 0.23036381602287292, 'timestamp': '2025-10-01 04:12:56.188807', 'step': 346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:56.222427', 'step': 346, 'epoch': 1} {'type': 'loss', 'content': 0.26625633239746094, 'timestamp': '2025-10-01 04:12:56.224876', 'step': 347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:56.259687', 'step': 347, 'epoch': 1} {'type': 'loss', 'content': 0.20773574709892273, 'timestamp': '2025-10-01 04:12:56.283553', 'step': 348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:56.324038', 'step': 348, 'epoch': 1} {'type': 'loss', 'content': 0.18646208941936493, 'timestamp': '2025-10-01 04:12:56.329654', 'step': 349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:56.364169', 'step': 349, 'epoch': 1} {'type': 'loss', 'content': 0.16010884940624237, 'timestamp': '2025-10-01 04:12:56.366228', 'step': 350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:56.397688', 'step': 350, 'epoch': 1} {'type': 'loss', 'content': 0.17775070667266846, 'timestamp': '2025-10-01 04:12:56.400013', 'step': 351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:56.431519', 'step': 351, 'epoch': 1} {'type': 'loss', 'content': 0.1906484067440033, 'timestamp': '2025-10-01 04:12:56.455104', 'step': 352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:56.486308', 'step': 352, 'epoch': 1} {'type': 'loss', 'content': 0.12603820860385895, 'timestamp': '2025-10-01 04:12:56.488365', 'step': 353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:56.519003', 'step': 353, 'epoch': 1} {'type': 'loss', 'content': 0.11436907202005386, 'timestamp': '2025-10-01 04:12:56.521185', 'step': 354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:56.552946', 'step': 354, 'epoch': 1} {'type': 'loss', 'content': 0.1945236325263977, 'timestamp': '2025-10-01 04:12:56.555257', 'step': 355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:56.587453', 'step': 355, 'epoch': 1} {'type': 'loss', 'content': 0.3584557771682739, 'timestamp': '2025-10-01 04:12:56.611113', 'step': 356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:56.655827', 'step': 356, 'epoch': 1} {'type': 'loss', 'content': 0.20734794437885284, 'timestamp': '2025-10-01 04:12:56.658141', 'step': 357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:56.690730', 'step': 357, 'epoch': 1} {'type': 'loss', 'content': 0.08548590540885925, 'timestamp': '2025-10-01 04:12:56.692774', 'step': 358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:56.724932', 'step': 358, 'epoch': 1} {'type': 'loss', 'content': 0.2171408236026764, 'timestamp': '2025-10-01 04:12:56.726929', 'step': 359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:56.758209', 'step': 359, 'epoch': 1} {'type': 'loss', 'content': 0.21880632638931274, 'timestamp': '2025-10-01 04:12:56.782239', 'step': 360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:56.813231', 'step': 360, 'epoch': 1} {'type': 'loss', 'content': 0.15781912207603455, 'timestamp': '2025-10-01 04:12:56.815510', 'step': 361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:56.852076', 'step': 361, 'epoch': 1} {'type': 'loss', 'content': 0.19007812440395355, 'timestamp': '2025-10-01 04:12:56.854257', 'step': 362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:56.885737', 'step': 362, 'epoch': 1} {'type': 'loss', 'content': 0.13694213330745697, 'timestamp': '2025-10-01 04:12:56.887935', 'step': 363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:56.919274', 'step': 363, 'epoch': 1} {'type': 'loss', 'content': 0.1935303956270218, 'timestamp': '2025-10-01 04:12:56.943444', 'step': 364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:56.975013', 'step': 364, 'epoch': 1} {'type': 'loss', 'content': 0.27509766817092896, 'timestamp': '2025-10-01 04:12:56.977045', 'step': 365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:57.008915', 'step': 365, 'epoch': 1} {'type': 'loss', 'content': 0.17666441202163696, 'timestamp': '2025-10-01 04:12:57.011228', 'step': 366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:57.044014', 'step': 366, 'epoch': 1} {'type': 'loss', 'content': 0.3179033696651459, 'timestamp': '2025-10-01 04:12:57.046775', 'step': 367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:57.083960', 'step': 367, 'epoch': 1} {'type': 'loss', 'content': 0.14795637130737305, 'timestamp': '2025-10-01 04:12:57.107688', 'step': 368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:57.142238', 'step': 368, 'epoch': 1} {'type': 'loss', 'content': 0.24731317162513733, 'timestamp': '2025-10-01 04:12:57.144273', 'step': 369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:57.176068', 'step': 369, 'epoch': 1} {'type': 'loss', 'content': 0.12271597981452942, 'timestamp': '2025-10-01 04:12:57.177979', 'step': 370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:57.207429', 'step': 370, 'epoch': 1} {'type': 'loss', 'content': 0.2199622094631195, 'timestamp': '2025-10-01 04:12:57.209423', 'step': 371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:57.240582', 'step': 371, 'epoch': 1} {'type': 'loss', 'content': 0.26048311591148376, 'timestamp': '2025-10-01 04:12:57.264555', 'step': 372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:12:57.300504', 'step': 372, 'epoch': 1} {'type': 'loss', 'content': 0.14329352974891663, 'timestamp': '2025-10-01 04:12:57.302872', 'step': 373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:57.334333', 'step': 373, 'epoch': 1} {'type': 'loss', 'content': 0.18771284818649292, 'timestamp': '2025-10-01 04:12:57.336256', 'step': 374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:57.367194', 'step': 374, 'epoch': 1} {'type': 'loss', 'content': 0.15064892172813416, 'timestamp': '2025-10-01 04:12:57.368979', 'step': 375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:57.400559', 'step': 375, 'epoch': 1} {'type': 'loss', 'content': 0.13638384640216827, 'timestamp': '2025-10-01 04:12:57.424046', 'step': 376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:57.455647', 'step': 376, 'epoch': 1} {'type': 'loss', 'content': 0.20855724811553955, 'timestamp': '2025-10-01 04:12:57.457815', 'step': 377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:57.492069', 'step': 377, 'epoch': 1} {'type': 'loss', 'content': 0.16640616953372955, 'timestamp': '2025-10-01 04:12:57.494012', 'step': 378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:57.527045', 'step': 378, 'epoch': 1} {'type': 'loss', 'content': 0.19392414391040802, 'timestamp': '2025-10-01 04:12:57.528941', 'step': 379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:57.561643', 'step': 379, 'epoch': 1} {'type': 'loss', 'content': 0.28295761346817017, 'timestamp': '2025-10-01 04:12:57.584884', 'step': 380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:57.626263', 'step': 380, 'epoch': 1} {'type': 'loss', 'content': 0.23700951039791107, 'timestamp': '2025-10-01 04:12:57.628152', 'step': 381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:57.660135', 'step': 381, 'epoch': 1} {'type': 'loss', 'content': 0.32815125584602356, 'timestamp': '2025-10-01 04:12:57.662082', 'step': 382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:57.693676', 'step': 382, 'epoch': 1} {'type': 'loss', 'content': 0.14320893585681915, 'timestamp': '2025-10-01 04:12:57.695560', 'step': 383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:57.726270', 'step': 383, 'epoch': 1} {'type': 'loss', 'content': 0.2413434088230133, 'timestamp': '2025-10-01 04:12:57.749799', 'step': 384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:57.781386', 'step': 384, 'epoch': 1} {'type': 'loss', 'content': 0.20311124622821808, 'timestamp': '2025-10-01 04:12:57.783265', 'step': 385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:57.815328', 'step': 385, 'epoch': 1} {'type': 'loss', 'content': 0.1817169040441513, 'timestamp': '2025-10-01 04:12:57.817241', 'step': 386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:57.848247', 'step': 386, 'epoch': 1} {'type': 'loss', 'content': 0.12320336699485779, 'timestamp': '2025-10-01 04:12:57.850978', 'step': 387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:57.881331', 'step': 387, 'epoch': 1} {'type': 'loss', 'content': 0.22648850083351135, 'timestamp': '2025-10-01 04:12:57.906349', 'step': 388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:57.943431', 'step': 388, 'epoch': 1} {'type': 'loss', 'content': 0.17749907076358795, 'timestamp': '2025-10-01 04:12:57.945362', 'step': 389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:57.976104', 'step': 389, 'epoch': 1} {'type': 'loss', 'content': 0.19427284598350525, 'timestamp': '2025-10-01 04:12:57.978452', 'step': 390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:58.009676', 'step': 390, 'epoch': 1} {'type': 'loss', 'content': 0.2209610939025879, 'timestamp': '2025-10-01 04:12:58.013244', 'step': 391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:58.044481', 'step': 391, 'epoch': 1} {'type': 'loss', 'content': 0.2670728862285614, 'timestamp': '2025-10-01 04:12:58.068055', 'step': 392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:58.098837', 'step': 392, 'epoch': 1} {'type': 'loss', 'content': 0.17608638107776642, 'timestamp': '2025-10-01 04:12:58.100846', 'step': 393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:58.150067', 'step': 393, 'epoch': 1} {'type': 'loss', 'content': 0.14843808114528656, 'timestamp': '2025-10-01 04:12:58.152060', 'step': 394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:58.184720', 'step': 394, 'epoch': 1} {'type': 'loss', 'content': 0.2952413260936737, 'timestamp': '2025-10-01 04:12:58.187111', 'step': 395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:58.229480', 'step': 395, 'epoch': 1} {'type': 'loss', 'content': 0.11253459006547928, 'timestamp': '2025-10-01 04:12:58.252928', 'step': 396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:58.284064', 'step': 396, 'epoch': 1} {'type': 'loss', 'content': 0.15828444063663483, 'timestamp': '2025-10-01 04:12:58.285914', 'step': 397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:58.318428', 'step': 397, 'epoch': 1} {'type': 'loss', 'content': 0.2598855197429657, 'timestamp': '2025-10-01 04:12:58.320357', 'step': 398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:58.358219', 'step': 398, 'epoch': 1} {'type': 'loss', 'content': 0.2200826108455658, 'timestamp': '2025-10-01 04:12:58.360138', 'step': 399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:58.397086', 'step': 399, 'epoch': 1} {'type': 'loss', 'content': 0.15019740164279938, 'timestamp': '2025-10-01 04:12:58.420409', 'step': 400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:58.457802', 'step': 400, 'epoch': 1} {'type': 'loss', 'content': 0.24933072924613953, 'timestamp': '2025-10-01 04:12:58.460332', 'step': 401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:58.493576', 'step': 401, 'epoch': 1} {'type': 'loss', 'content': 0.19717195630073547, 'timestamp': '2025-10-01 04:12:58.495474', 'step': 402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:58.539868', 'step': 402, 'epoch': 1} {'type': 'loss', 'content': 0.17047610878944397, 'timestamp': '2025-10-01 04:12:58.542742', 'step': 403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:58.584952', 'step': 403, 'epoch': 1} {'type': 'loss', 'content': 0.1441061794757843, 'timestamp': '2025-10-01 04:12:58.608455', 'step': 404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:58.639680', 'step': 404, 'epoch': 1} {'type': 'loss', 'content': 0.19802208244800568, 'timestamp': '2025-10-01 04:12:58.641766', 'step': 405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:58.673296', 'step': 405, 'epoch': 1} {'type': 'loss', 'content': 0.20370379090309143, 'timestamp': '2025-10-01 04:12:58.675674', 'step': 406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:58.706359', 'step': 406, 'epoch': 1} {'type': 'loss', 'content': 0.14764028787612915, 'timestamp': '2025-10-01 04:12:58.708223', 'step': 407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:58.744757', 'step': 407, 'epoch': 1} {'type': 'loss', 'content': 0.19219720363616943, 'timestamp': '2025-10-01 04:12:58.768762', 'step': 408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:12:58.803026', 'step': 408, 'epoch': 1} {'type': 'loss', 'content': 0.14860543608665466, 'timestamp': '2025-10-01 04:12:58.804922', 'step': 409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:58.844319', 'step': 409, 'epoch': 1} {'type': 'loss', 'content': 0.20786231756210327, 'timestamp': '2025-10-01 04:12:58.846251', 'step': 410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:58.877015', 'step': 410, 'epoch': 1} {'type': 'loss', 'content': 0.2527414858341217, 'timestamp': '2025-10-01 04:12:58.882890', 'step': 411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:58.913845', 'step': 411, 'epoch': 1} {'type': 'loss', 'content': 0.19612865149974823, 'timestamp': '2025-10-01 04:12:58.937237', 'step': 412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:58.967534', 'step': 412, 'epoch': 1} {'type': 'loss', 'content': 0.1992037147283554, 'timestamp': '2025-10-01 04:12:58.969739', 'step': 413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:58.999721', 'step': 413, 'epoch': 1} {'type': 'loss', 'content': 0.213658407330513, 'timestamp': '2025-10-01 04:12:59.001665', 'step': 414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:59.047749', 'step': 414, 'epoch': 1} {'type': 'loss', 'content': 0.1644250601530075, 'timestamp': '2025-10-01 04:12:59.050014', 'step': 415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:59.108419', 'step': 415, 'epoch': 1} {'type': 'loss', 'content': 0.1665400117635727, 'timestamp': '2025-10-01 04:12:59.131936', 'step': 416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:59.164826', 'step': 416, 'epoch': 1} {'type': 'loss', 'content': 0.11473795771598816, 'timestamp': '2025-10-01 04:12:59.167150', 'step': 417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:59.202925', 'step': 417, 'epoch': 1} {'type': 'loss', 'content': 0.18866576254367828, 'timestamp': '2025-10-01 04:12:59.205360', 'step': 418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:59.236528', 'step': 418, 'epoch': 1} {'type': 'loss', 'content': 0.11637260764837265, 'timestamp': '2025-10-01 04:12:59.238994', 'step': 419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:59.270658', 'step': 419, 'epoch': 1} {'type': 'loss', 'content': 0.15352632105350494, 'timestamp': '2025-10-01 04:12:59.294029', 'step': 420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:59.325005', 'step': 420, 'epoch': 1} {'type': 'loss', 'content': 0.17056255042552948, 'timestamp': '2025-10-01 04:12:59.327013', 'step': 421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:59.359810', 'step': 421, 'epoch': 1} {'type': 'loss', 'content': 0.20575369894504547, 'timestamp': '2025-10-01 04:12:59.361744', 'step': 422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:59.394599', 'step': 422, 'epoch': 1} {'type': 'loss', 'content': 0.23700135946273804, 'timestamp': '2025-10-01 04:12:59.396529', 'step': 423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:59.432535', 'step': 423, 'epoch': 1} {'type': 'loss', 'content': 0.16990335285663605, 'timestamp': '2025-10-01 04:12:59.455978', 'step': 424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:59.491319', 'step': 424, 'epoch': 1} {'type': 'loss', 'content': 0.1479709893465042, 'timestamp': '2025-10-01 04:12:59.493185', 'step': 425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:59.531244', 'step': 425, 'epoch': 1} {'type': 'loss', 'content': 0.20625178515911102, 'timestamp': '2025-10-01 04:12:59.533173', 'step': 426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:59.564888', 'step': 426, 'epoch': 1} {'type': 'loss', 'content': 0.34484484791755676, 'timestamp': '2025-10-01 04:12:59.566938', 'step': 427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:12:59.608110', 'step': 427, 'epoch': 1} {'type': 'loss', 'content': 0.22376540303230286, 'timestamp': '2025-10-01 04:12:59.633004', 'step': 428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:59.664032', 'step': 428, 'epoch': 1} {'type': 'loss', 'content': 0.17609445750713348, 'timestamp': '2025-10-01 04:12:59.665885', 'step': 429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:59.697374', 'step': 429, 'epoch': 1} {'type': 'loss', 'content': 0.1254003793001175, 'timestamp': '2025-10-01 04:12:59.702479', 'step': 430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:12:59.735419', 'step': 430, 'epoch': 1} {'type': 'loss', 'content': 0.20795980095863342, 'timestamp': '2025-10-01 04:12:59.738302', 'step': 431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:59.770586', 'step': 431, 'epoch': 1} {'type': 'loss', 'content': 0.160133957862854, 'timestamp': '2025-10-01 04:12:59.794016', 'step': 432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:59.826456', 'step': 432, 'epoch': 1} {'type': 'loss', 'content': 0.13430675864219666, 'timestamp': '2025-10-01 04:12:59.828468', 'step': 433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:12:59.861293', 'step': 433, 'epoch': 1} {'type': 'loss', 'content': 0.16719204187393188, 'timestamp': '2025-10-01 04:12:59.863344', 'step': 434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:12:59.894514', 'step': 434, 'epoch': 1} {'type': 'loss', 'content': 0.3192874491214752, 'timestamp': '2025-10-01 04:12:59.900938', 'step': 435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:59.934072', 'step': 435, 'epoch': 1} {'type': 'loss', 'content': 0.11484664678573608, 'timestamp': '2025-10-01 04:12:59.957611', 'step': 436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:12:59.990316', 'step': 436, 'epoch': 1} {'type': 'loss', 'content': 0.22936123609542847, 'timestamp': '2025-10-01 04:12:59.992295', 'step': 437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:00.022641', 'step': 437, 'epoch': 1} {'type': 'loss', 'content': 0.23262782394886017, 'timestamp': '2025-10-01 04:13:00.025846', 'step': 438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:00.059050', 'step': 438, 'epoch': 1} {'type': 'loss', 'content': 0.13008837401866913, 'timestamp': '2025-10-01 04:13:00.061584', 'step': 439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:00.095278', 'step': 439, 'epoch': 1} {'type': 'loss', 'content': 0.24390380084514618, 'timestamp': '2025-10-01 04:13:00.122398', 'step': 440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:00.151992', 'step': 440, 'epoch': 1} {'type': 'loss', 'content': 0.17821823060512543, 'timestamp': '2025-10-01 04:13:00.153854', 'step': 441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:00.183808', 'step': 441, 'epoch': 1} {'type': 'loss', 'content': 0.180145725607872, 'timestamp': '2025-10-01 04:13:00.185576', 'step': 442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:00.217359', 'step': 442, 'epoch': 1} {'type': 'loss', 'content': 0.17576342821121216, 'timestamp': '2025-10-01 04:13:00.219297', 'step': 443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:00.250961', 'step': 443, 'epoch': 1} {'type': 'loss', 'content': 0.20495587587356567, 'timestamp': '2025-10-01 04:13:00.274385', 'step': 444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:00.307479', 'step': 444, 'epoch': 1} {'type': 'loss', 'content': 0.2623063027858734, 'timestamp': '2025-10-01 04:13:00.309756', 'step': 445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:00.349381', 'step': 445, 'epoch': 1} {'type': 'loss', 'content': 0.24776339530944824, 'timestamp': '2025-10-01 04:13:00.351374', 'step': 446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:00.392536', 'step': 446, 'epoch': 1} {'type': 'loss', 'content': 0.2564249038696289, 'timestamp': '2025-10-01 04:13:00.394557', 'step': 447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:00.435581', 'step': 447, 'epoch': 1} {'type': 'loss', 'content': 0.17913782596588135, 'timestamp': '2025-10-01 04:13:00.459275', 'step': 448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:00.490529', 'step': 448, 'epoch': 1} {'type': 'loss', 'content': 0.259932279586792, 'timestamp': '2025-10-01 04:13:00.492570', 'step': 449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:00.532895', 'step': 449, 'epoch': 1} {'type': 'loss', 'content': 0.171049103140831, 'timestamp': '2025-10-01 04:13:00.534767', 'step': 450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:00.568285', 'step': 450, 'epoch': 1} {'type': 'loss', 'content': 0.182845801115036, 'timestamp': '2025-10-01 04:13:00.570086', 'step': 451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:00.608415', 'step': 451, 'epoch': 1} {'type': 'loss', 'content': 0.21972422301769257, 'timestamp': '2025-10-01 04:13:00.631776', 'step': 452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:00.661882', 'step': 452, 'epoch': 1} {'type': 'loss', 'content': 0.23100616037845612, 'timestamp': '2025-10-01 04:13:00.663883', 'step': 453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:00.693928', 'step': 453, 'epoch': 1} {'type': 'loss', 'content': 0.19024154543876648, 'timestamp': '2025-10-01 04:13:00.696611', 'step': 454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:00.727783', 'step': 454, 'epoch': 1} {'type': 'loss', 'content': 0.22976182401180267, 'timestamp': '2025-10-01 04:13:00.730214', 'step': 455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:00.761166', 'step': 455, 'epoch': 1} {'type': 'loss', 'content': 0.23533491790294647, 'timestamp': '2025-10-01 04:13:00.784655', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:00.815942', 'step': 456, 'epoch': 1} {'type': 'loss', 'content': 0.24367955327033997, 'timestamp': '2025-10-01 04:13:00.817869', 'step': 457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:00.849677', 'step': 457, 'epoch': 1} {'type': 'loss', 'content': 0.19103240966796875, 'timestamp': '2025-10-01 04:13:00.851644', 'step': 458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:00.884196', 'step': 458, 'epoch': 1} {'type': 'loss', 'content': 0.24220427870750427, 'timestamp': '2025-10-01 04:13:00.886028', 'step': 459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:00.918159', 'step': 459, 'epoch': 1} {'type': 'loss', 'content': 0.23861514031887054, 'timestamp': '2025-10-01 04:13:00.941636', 'step': 460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:00.973724', 'step': 460, 'epoch': 1} {'type': 'loss', 'content': 0.23018188774585724, 'timestamp': '2025-10-01 04:13:00.975638', 'step': 461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:01.006712', 'step': 461, 'epoch': 1} {'type': 'loss', 'content': 0.23727451264858246, 'timestamp': '2025-10-01 04:13:01.008652', 'step': 462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:01.040703', 'step': 462, 'epoch': 1} {'type': 'loss', 'content': 0.16902713477611542, 'timestamp': '2025-10-01 04:13:01.042592', 'step': 463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:01.078111', 'step': 463, 'epoch': 1} {'type': 'loss', 'content': 0.24754638969898224, 'timestamp': '2025-10-01 04:13:01.101647', 'step': 464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:01.132935', 'step': 464, 'epoch': 1} {'type': 'loss', 'content': 0.1431966871023178, 'timestamp': '2025-10-01 04:13:01.135139', 'step': 465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:01.168310', 'step': 465, 'epoch': 1} {'type': 'loss', 'content': 0.20611368119716644, 'timestamp': '2025-10-01 04:13:01.170161', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:13:10.552021', 'step': 466, 'epoch': 1} {'type': 'pplx', 'content': 7920.79428167882, 'timestamp': '2025-10-01 04:13:10.554763', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:10.595390', 'step': 466, 'epoch': 1} {'type': 'loss', 'content': 0.21690191328525543, 'timestamp': '2025-10-01 04:13:10.597338', 'step': 467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:10.632585', 'step': 467, 'epoch': 1} {'type': 'loss', 'content': 0.18691514432430267, 'timestamp': '2025-10-01 04:13:10.656153', 'step': 468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:10.736892', 'step': 468, 'epoch': 1} {'type': 'loss', 'content': 0.2074434757232666, 'timestamp': '2025-10-01 04:13:10.739047', 'step': 469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:10.788440', 'step': 469, 'epoch': 1} {'type': 'loss', 'content': 0.10530637949705124, 'timestamp': '2025-10-01 04:13:10.790461', 'step': 470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:10.821292', 'step': 470, 'epoch': 1} {'type': 'loss', 'content': 0.23715797066688538, 'timestamp': '2025-10-01 04:13:10.830653', 'step': 471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:10.891187', 'step': 471, 'epoch': 1} {'type': 'loss', 'content': 0.23669812083244324, 'timestamp': '2025-10-01 04:13:10.914728', 'step': 472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:10.956871', 'step': 472, 'epoch': 1} {'type': 'loss', 'content': 0.17904002964496613, 'timestamp': '2025-10-01 04:13:10.959818', 'step': 473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:10.994233', 'step': 473, 'epoch': 1} {'type': 'loss', 'content': 0.09829526394605637, 'timestamp': '2025-10-01 04:13:10.997026', 'step': 474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:11.028831', 'step': 474, 'epoch': 1} {'type': 'loss', 'content': 0.28911805152893066, 'timestamp': '2025-10-01 04:13:11.031226', 'step': 475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:11.071213', 'step': 475, 'epoch': 1} {'type': 'loss', 'content': 0.16506659984588623, 'timestamp': '2025-10-01 04:13:11.096296', 'step': 476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:11.144112', 'step': 476, 'epoch': 1} {'type': 'loss', 'content': 0.15807044506072998, 'timestamp': '2025-10-01 04:13:11.157174', 'step': 477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:11.199683', 'step': 477, 'epoch': 1} {'type': 'loss', 'content': 0.12698771059513092, 'timestamp': '2025-10-01 04:13:11.202371', 'step': 478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:11.239162', 'step': 478, 'epoch': 1} {'type': 'loss', 'content': 0.1608838140964508, 'timestamp': '2025-10-01 04:13:11.242055', 'step': 479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:11.277040', 'step': 479, 'epoch': 1} {'type': 'loss', 'content': 0.3184767961502075, 'timestamp': '2025-10-01 04:13:11.302993', 'step': 480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:13:11.370954', 'step': 480, 'epoch': 1} {'type': 'loss', 'content': 0.2140602469444275, 'timestamp': '2025-10-01 04:13:11.373371', 'step': 481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:11.408336', 'step': 481, 'epoch': 1} {'type': 'loss', 'content': 0.1781146228313446, 'timestamp': '2025-10-01 04:13:11.419340', 'step': 482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:11.476231', 'step': 482, 'epoch': 1} {'type': 'loss', 'content': 0.14503711462020874, 'timestamp': '2025-10-01 04:13:11.478902', 'step': 483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:11.518264', 'step': 483, 'epoch': 1} {'type': 'loss', 'content': 0.2536601424217224, 'timestamp': '2025-10-01 04:13:11.545994', 'step': 484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:11.594549', 'step': 484, 'epoch': 1} {'type': 'loss', 'content': 0.17131733894348145, 'timestamp': '2025-10-01 04:13:11.609393', 'step': 485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:11.652048', 'step': 485, 'epoch': 1} {'type': 'loss', 'content': 0.16235341131687164, 'timestamp': '2025-10-01 04:13:11.654245', 'step': 486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:11.692216', 'step': 486, 'epoch': 1} {'type': 'loss', 'content': 0.11198431998491287, 'timestamp': '2025-10-01 04:13:11.702906', 'step': 487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:11.744950', 'step': 487, 'epoch': 1} {'type': 'loss', 'content': 0.26457008719444275, 'timestamp': '2025-10-01 04:13:11.771150', 'step': 488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:11.812393', 'step': 488, 'epoch': 1} {'type': 'loss', 'content': 0.27115392684936523, 'timestamp': '2025-10-01 04:13:11.823289', 'step': 489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:11.862636', 'step': 489, 'epoch': 1} {'type': 'loss', 'content': 0.1606362760066986, 'timestamp': '2025-10-01 04:13:11.877912', 'step': 490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:11.909991', 'step': 490, 'epoch': 1} {'type': 'loss', 'content': 0.2047857642173767, 'timestamp': '2025-10-01 04:13:11.919389', 'step': 491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:11.955343', 'step': 491, 'epoch': 1} {'type': 'loss', 'content': 0.20479507744312286, 'timestamp': '2025-10-01 04:13:11.988165', 'step': 492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:12.023451', 'step': 492, 'epoch': 1} {'type': 'loss', 'content': 0.12617293000221252, 'timestamp': '2025-10-01 04:13:12.026925', 'step': 493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:12.058889', 'step': 493, 'epoch': 1} {'type': 'loss', 'content': 0.22749243676662445, 'timestamp': '2025-10-01 04:13:12.061565', 'step': 494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:12.095208', 'step': 494, 'epoch': 1} {'type': 'loss', 'content': 0.2330593466758728, 'timestamp': '2025-10-01 04:13:12.097532', 'step': 495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:12.128290', 'step': 495, 'epoch': 1} {'type': 'loss', 'content': 0.2597722113132477, 'timestamp': '2025-10-01 04:13:12.163915', 'step': 496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:12.197364', 'step': 496, 'epoch': 1} {'type': 'loss', 'content': 0.26654866337776184, 'timestamp': '2025-10-01 04:13:12.203907', 'step': 497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:12.235769', 'step': 497, 'epoch': 1} {'type': 'loss', 'content': 0.21124061942100525, 'timestamp': '2025-10-01 04:13:12.238918', 'step': 498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:12.271932', 'step': 498, 'epoch': 1} {'type': 'loss', 'content': 0.20265281200408936, 'timestamp': '2025-10-01 04:13:12.278971', 'step': 499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:12.313229', 'step': 499, 'epoch': 1} {'type': 'loss', 'content': 0.1525307595729828, 'timestamp': '2025-10-01 04:13:12.337802', 'step': 500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-10-01 04:13:17.298201', 'step': 500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:17.329536', 'step': 500, 'epoch': 1} {'type': 'loss', 'content': 0.0876256451010704, 'timestamp': '2025-10-01 04:13:17.331688', 'step': 501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:17.368711', 'step': 501, 'epoch': 1} {'type': 'loss', 'content': 0.23457759618759155, 'timestamp': '2025-10-01 04:13:17.370747', 'step': 502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:17.401363', 'step': 502, 'epoch': 1} {'type': 'loss', 'content': 0.14609986543655396, 'timestamp': '2025-10-01 04:13:17.403422', 'step': 503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:17.434077', 'step': 503, 'epoch': 1} {'type': 'loss', 'content': 0.13777616620063782, 'timestamp': '2025-10-01 04:13:17.457543', 'step': 504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:17.495359', 'step': 504, 'epoch': 1} {'type': 'loss', 'content': 0.17220523953437805, 'timestamp': '2025-10-01 04:13:17.497262', 'step': 505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:17.536470', 'step': 505, 'epoch': 1} {'type': 'loss', 'content': 0.19555872678756714, 'timestamp': '2025-10-01 04:13:17.538211', 'step': 506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:17.574392', 'step': 506, 'epoch': 1} {'type': 'loss', 'content': 0.2615610361099243, 'timestamp': '2025-10-01 04:13:17.576296', 'step': 507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:17.608797', 'step': 507, 'epoch': 1} {'type': 'loss', 'content': 0.12486719340085983, 'timestamp': '2025-10-01 04:13:17.632324', 'step': 508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:17.669301', 'step': 508, 'epoch': 1} {'type': 'loss', 'content': 0.17470836639404297, 'timestamp': '2025-10-01 04:13:17.671187', 'step': 509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:17.703664', 'step': 509, 'epoch': 1} {'type': 'loss', 'content': 0.1579800546169281, 'timestamp': '2025-10-01 04:13:17.705567', 'step': 510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:17.740155', 'step': 510, 'epoch': 1} {'type': 'loss', 'content': 0.1192038282752037, 'timestamp': '2025-10-01 04:13:17.742191', 'step': 511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:17.790674', 'step': 511, 'epoch': 1} {'type': 'loss', 'content': 0.1472236067056656, 'timestamp': '2025-10-01 04:13:17.814049', 'step': 512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:17.845333', 'step': 512, 'epoch': 1} {'type': 'loss', 'content': 0.2770352363586426, 'timestamp': '2025-10-01 04:13:17.847341', 'step': 513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:17.890997', 'step': 513, 'epoch': 1} {'type': 'loss', 'content': 0.17281126976013184, 'timestamp': '2025-10-01 04:13:17.892970', 'step': 514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:17.927536', 'step': 514, 'epoch': 1} {'type': 'loss', 'content': 0.20881688594818115, 'timestamp': '2025-10-01 04:13:17.929437', 'step': 515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:17.959856', 'step': 515, 'epoch': 1} {'type': 'loss', 'content': 0.20688246190547943, 'timestamp': '2025-10-01 04:13:17.983404', 'step': 516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:18.014419', 'step': 516, 'epoch': 1} {'type': 'loss', 'content': 0.22102388739585876, 'timestamp': '2025-10-01 04:13:18.016460', 'step': 517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:18.047913', 'step': 517, 'epoch': 1} {'type': 'loss', 'content': 0.13182923197746277, 'timestamp': '2025-10-01 04:13:18.049740', 'step': 518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:18.081291', 'step': 518, 'epoch': 1} {'type': 'loss', 'content': 0.2171369045972824, 'timestamp': '2025-10-01 04:13:18.083124', 'step': 519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:18.113637', 'step': 519, 'epoch': 1} {'type': 'loss', 'content': 0.17317931354045868, 'timestamp': '2025-10-01 04:13:18.137668', 'step': 520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:18.169807', 'step': 520, 'epoch': 1} {'type': 'loss', 'content': 0.1363762468099594, 'timestamp': '2025-10-01 04:13:18.171734', 'step': 521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:18.205421', 'step': 521, 'epoch': 1} {'type': 'loss', 'content': 0.3021180331707001, 'timestamp': '2025-10-01 04:13:18.207480', 'step': 522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:18.238549', 'step': 522, 'epoch': 1} {'type': 'loss', 'content': 0.19520261883735657, 'timestamp': '2025-10-01 04:13:18.240421', 'step': 523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:18.271423', 'step': 523, 'epoch': 1} {'type': 'loss', 'content': 0.19147683680057526, 'timestamp': '2025-10-01 04:13:18.294832', 'step': 524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:18.326899', 'step': 524, 'epoch': 1} {'type': 'loss', 'content': 0.09856101125478745, 'timestamp': '2025-10-01 04:13:18.328853', 'step': 525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:18.360621', 'step': 525, 'epoch': 1} {'type': 'loss', 'content': 0.26281794905662537, 'timestamp': '2025-10-01 04:13:18.362961', 'step': 526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:18.393259', 'step': 526, 'epoch': 1} {'type': 'loss', 'content': 0.17819170653820038, 'timestamp': '2025-10-01 04:13:18.395131', 'step': 527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:13:18.431779', 'step': 527, 'epoch': 1} {'type': 'loss', 'content': 0.17001283168792725, 'timestamp': '2025-10-01 04:13:18.457273', 'step': 528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:18.488174', 'step': 528, 'epoch': 1} {'type': 'loss', 'content': 0.1529104858636856, 'timestamp': '2025-10-01 04:13:18.490101', 'step': 529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:18.521074', 'step': 529, 'epoch': 1} {'type': 'loss', 'content': 0.2055167555809021, 'timestamp': '2025-10-01 04:13:18.522998', 'step': 530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:18.556858', 'step': 530, 'epoch': 1} {'type': 'loss', 'content': 0.14530202746391296, 'timestamp': '2025-10-01 04:13:18.559035', 'step': 531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:18.591476', 'step': 531, 'epoch': 1} {'type': 'loss', 'content': 0.14561663568019867, 'timestamp': '2025-10-01 04:13:18.615089', 'step': 532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:13:18.667881', 'step': 532, 'epoch': 1} {'type': 'loss', 'content': 0.15405581891536713, 'timestamp': '2025-10-01 04:13:18.669936', 'step': 533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:13:18.703679', 'step': 533, 'epoch': 1} {'type': 'loss', 'content': 0.2188505083322525, 'timestamp': '2025-10-01 04:13:18.708103', 'step': 534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:18.743355', 'step': 534, 'epoch': 1} {'type': 'loss', 'content': 0.3681154251098633, 'timestamp': '2025-10-01 04:13:18.745224', 'step': 535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:18.780686', 'step': 535, 'epoch': 1} {'type': 'loss', 'content': 0.17665061354637146, 'timestamp': '2025-10-01 04:13:18.804110', 'step': 536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:18.837552', 'step': 536, 'epoch': 1} {'type': 'loss', 'content': 0.12320395559072495, 'timestamp': '2025-10-01 04:13:18.839448', 'step': 537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:18.885679', 'step': 537, 'epoch': 1} {'type': 'loss', 'content': 0.09814219921827316, 'timestamp': '2025-10-01 04:13:18.887551', 'step': 538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:18.929162', 'step': 538, 'epoch': 1} {'type': 'loss', 'content': 0.2264297604560852, 'timestamp': '2025-10-01 04:13:18.931047', 'step': 539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:18.984681', 'step': 539, 'epoch': 1} {'type': 'loss', 'content': 0.2024783194065094, 'timestamp': '2025-10-01 04:13:19.008158', 'step': 540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.068515', 'step': 540, 'epoch': 1} {'type': 'loss', 'content': 0.19181358814239502, 'timestamp': '2025-10-01 04:13:19.070484', 'step': 541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.114790', 'step': 541, 'epoch': 1} {'type': 'loss', 'content': 0.17831382155418396, 'timestamp': '2025-10-01 04:13:19.116551', 'step': 542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.161263', 'step': 542, 'epoch': 1} {'type': 'loss', 'content': 0.13556766510009766, 'timestamp': '2025-10-01 04:13:19.163465', 'step': 543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:19.197134', 'step': 543, 'epoch': 1} {'type': 'loss', 'content': 0.23750342428684235, 'timestamp': '2025-10-01 04:13:19.220522', 'step': 544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:19.252681', 'step': 544, 'epoch': 1} {'type': 'loss', 'content': 0.18367168307304382, 'timestamp': '2025-10-01 04:13:19.254645', 'step': 545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.288262', 'step': 545, 'epoch': 1} {'type': 'loss', 'content': 0.23020724952220917, 'timestamp': '2025-10-01 04:13:19.290395', 'step': 546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:19.334236', 'step': 546, 'epoch': 1} {'type': 'loss', 'content': 0.1724819839000702, 'timestamp': '2025-10-01 04:13:19.336314', 'step': 547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:19.368710', 'step': 547, 'epoch': 1} {'type': 'loss', 'content': 0.2702726423740387, 'timestamp': '2025-10-01 04:13:19.398509', 'step': 548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:19.433247', 'step': 548, 'epoch': 1} {'type': 'loss', 'content': 0.29931384325027466, 'timestamp': '2025-10-01 04:13:19.434978', 'step': 549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.478270', 'step': 549, 'epoch': 1} {'type': 'loss', 'content': 0.10747827589511871, 'timestamp': '2025-10-01 04:13:19.480184', 'step': 550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.521148', 'step': 550, 'epoch': 1} {'type': 'loss', 'content': 0.19658885896205902, 'timestamp': '2025-10-01 04:13:19.523930', 'step': 551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.562792', 'step': 551, 'epoch': 1} {'type': 'loss', 'content': 0.22574959695339203, 'timestamp': '2025-10-01 04:13:19.586171', 'step': 552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:19.617438', 'step': 552, 'epoch': 1} {'type': 'loss', 'content': 0.1936832219362259, 'timestamp': '2025-10-01 04:13:19.619501', 'step': 553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:19.661395', 'step': 553, 'epoch': 1} {'type': 'loss', 'content': 0.20879416167736053, 'timestamp': '2025-10-01 04:13:19.663353', 'step': 554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.709100', 'step': 554, 'epoch': 1} {'type': 'loss', 'content': 0.15769946575164795, 'timestamp': '2025-10-01 04:13:19.710992', 'step': 555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.747393', 'step': 555, 'epoch': 1} {'type': 'loss', 'content': 0.15040484070777893, 'timestamp': '2025-10-01 04:13:19.770905', 'step': 556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.802266', 'step': 556, 'epoch': 1} {'type': 'loss', 'content': 0.1383841633796692, 'timestamp': '2025-10-01 04:13:19.804130', 'step': 557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:19.840999', 'step': 557, 'epoch': 1} {'type': 'loss', 'content': 0.09969167411327362, 'timestamp': '2025-10-01 04:13:19.842926', 'step': 558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.898737', 'step': 558, 'epoch': 1} {'type': 'loss', 'content': 0.18218132853507996, 'timestamp': '2025-10-01 04:13:19.900580', 'step': 559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:19.932375', 'step': 559, 'epoch': 1} {'type': 'loss', 'content': 0.279839426279068, 'timestamp': '2025-10-01 04:13:19.955933', 'step': 560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:19.994862', 'step': 560, 'epoch': 1} {'type': 'loss', 'content': 0.1731099784374237, 'timestamp': '2025-10-01 04:13:19.996708', 'step': 561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:20.027445', 'step': 561, 'epoch': 1} {'type': 'loss', 'content': 0.12571173906326294, 'timestamp': '2025-10-01 04:13:20.029550', 'step': 562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:20.061618', 'step': 562, 'epoch': 1} {'type': 'loss', 'content': 0.1840372234582901, 'timestamp': '2025-10-01 04:13:20.063994', 'step': 563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:20.103469', 'step': 563, 'epoch': 1} {'type': 'loss', 'content': 0.220825657248497, 'timestamp': '2025-10-01 04:13:20.127326', 'step': 564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:13:20.162058', 'step': 564, 'epoch': 1} {'type': 'loss', 'content': 0.2800828218460083, 'timestamp': '2025-10-01 04:13:20.164492', 'step': 565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:20.196984', 'step': 565, 'epoch': 1} {'type': 'loss', 'content': 0.21447084844112396, 'timestamp': '2025-10-01 04:13:20.199156', 'step': 566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:20.237498', 'step': 566, 'epoch': 1} {'type': 'loss', 'content': 0.30697283148765564, 'timestamp': '2025-10-01 04:13:20.239452', 'step': 567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:20.271434', 'step': 567, 'epoch': 1} {'type': 'loss', 'content': 0.16291247308254242, 'timestamp': '2025-10-01 04:13:20.295187', 'step': 568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:20.328320', 'step': 568, 'epoch': 1} {'type': 'loss', 'content': 0.2510395050048828, 'timestamp': '2025-10-01 04:13:20.330554', 'step': 569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:20.363688', 'step': 569, 'epoch': 1} {'type': 'loss', 'content': 0.15356452763080597, 'timestamp': '2025-10-01 04:13:20.365628', 'step': 570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:20.398295', 'step': 570, 'epoch': 1} {'type': 'loss', 'content': 0.18800018727779388, 'timestamp': '2025-10-01 04:13:20.400443', 'step': 571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:20.439632', 'step': 571, 'epoch': 1} {'type': 'loss', 'content': 0.18474829196929932, 'timestamp': '2025-10-01 04:13:20.463576', 'step': 572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:20.496109', 'step': 572, 'epoch': 1} {'type': 'loss', 'content': 0.13451239466667175, 'timestamp': '2025-10-01 04:13:20.498109', 'step': 573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:20.530698', 'step': 573, 'epoch': 1} {'type': 'loss', 'content': 0.32201123237609863, 'timestamp': '2025-10-01 04:13:20.533184', 'step': 574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:20.566678', 'step': 574, 'epoch': 1} {'type': 'loss', 'content': 0.13548679649829865, 'timestamp': '2025-10-01 04:13:20.568889', 'step': 575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:20.609336', 'step': 575, 'epoch': 1} {'type': 'loss', 'content': 0.1809399425983429, 'timestamp': '2025-10-01 04:13:20.633104', 'step': 576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:20.665463', 'step': 576, 'epoch': 1} {'type': 'loss', 'content': 0.09361544996500015, 'timestamp': '2025-10-01 04:13:20.667667', 'step': 577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:20.703659', 'step': 577, 'epoch': 1} {'type': 'loss', 'content': 0.23906412720680237, 'timestamp': '2025-10-01 04:13:20.705778', 'step': 578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:20.753810', 'step': 578, 'epoch': 1} {'type': 'loss', 'content': 0.28149133920669556, 'timestamp': '2025-10-01 04:13:20.755795', 'step': 579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:20.794947', 'step': 579, 'epoch': 1} {'type': 'loss', 'content': 0.17403177917003632, 'timestamp': '2025-10-01 04:13:20.818607', 'step': 580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:20.856363', 'step': 580, 'epoch': 1} {'type': 'loss', 'content': 0.1498078554868698, 'timestamp': '2025-10-01 04:13:20.858340', 'step': 581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:20.889917', 'step': 581, 'epoch': 1} {'type': 'loss', 'content': 0.10935946553945541, 'timestamp': '2025-10-01 04:13:20.891905', 'step': 582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:20.927659', 'step': 582, 'epoch': 1} {'type': 'loss', 'content': 0.18592007458209991, 'timestamp': '2025-10-01 04:13:20.929759', 'step': 583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:20.964664', 'step': 583, 'epoch': 1} {'type': 'loss', 'content': 0.19507472217082977, 'timestamp': '2025-10-01 04:13:20.988348', 'step': 584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:21.020006', 'step': 584, 'epoch': 1} {'type': 'loss', 'content': 0.16399618983268738, 'timestamp': '2025-10-01 04:13:21.022661', 'step': 585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:13:21.061004', 'step': 585, 'epoch': 1} {'type': 'loss', 'content': 0.12157953530550003, 'timestamp': '2025-10-01 04:13:21.065423', 'step': 586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:21.100169', 'step': 586, 'epoch': 1} {'type': 'loss', 'content': 0.15140949189662933, 'timestamp': '2025-10-01 04:13:21.102311', 'step': 587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:21.134931', 'step': 587, 'epoch': 1} {'type': 'loss', 'content': 0.18224284052848816, 'timestamp': '2025-10-01 04:13:21.158720', 'step': 588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:21.197499', 'step': 588, 'epoch': 1} {'type': 'loss', 'content': 0.25554731488227844, 'timestamp': '2025-10-01 04:13:21.199816', 'step': 589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:21.232805', 'step': 589, 'epoch': 1} {'type': 'loss', 'content': 0.18913023173809052, 'timestamp': '2025-10-01 04:13:21.235094', 'step': 590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:21.271556', 'step': 590, 'epoch': 1} {'type': 'loss', 'content': 0.18933594226837158, 'timestamp': '2025-10-01 04:13:21.274358', 'step': 591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:13:21.306354', 'step': 591, 'epoch': 1} {'type': 'loss', 'content': 0.1860291212797165, 'timestamp': '2025-10-01 04:13:21.334705', 'step': 592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:21.367245', 'step': 592, 'epoch': 1} {'type': 'loss', 'content': 0.21243701875209808, 'timestamp': '2025-10-01 04:13:21.369416', 'step': 593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:21.401638', 'step': 593, 'epoch': 1} {'type': 'loss', 'content': 0.13829924166202545, 'timestamp': '2025-10-01 04:13:21.403850', 'step': 594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:21.435621', 'step': 594, 'epoch': 1} {'type': 'loss', 'content': 0.15976138412952423, 'timestamp': '2025-10-01 04:13:21.437543', 'step': 595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:21.469012', 'step': 595, 'epoch': 1} {'type': 'loss', 'content': 0.3543362617492676, 'timestamp': '2025-10-01 04:13:21.492374', 'step': 596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:21.524693', 'step': 596, 'epoch': 1} {'type': 'loss', 'content': 0.1901368647813797, 'timestamp': '2025-10-01 04:13:21.526756', 'step': 597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:21.561349', 'step': 597, 'epoch': 1} {'type': 'loss', 'content': 0.21813538670539856, 'timestamp': '2025-10-01 04:13:21.563971', 'step': 598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:21.596653', 'step': 598, 'epoch': 1} {'type': 'loss', 'content': 0.21268384158611298, 'timestamp': '2025-10-01 04:13:21.598579', 'step': 599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:21.630372', 'step': 599, 'epoch': 1} {'type': 'loss', 'content': 0.2167573720216751, 'timestamp': '2025-10-01 04:13:21.654050', 'step': 600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:21.686829', 'step': 600, 'epoch': 1} {'type': 'loss', 'content': 0.15679682791233063, 'timestamp': '2025-10-01 04:13:21.688739', 'step': 601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:21.723773', 'step': 601, 'epoch': 1} {'type': 'loss', 'content': 0.29456278681755066, 'timestamp': '2025-10-01 04:13:21.726215', 'step': 602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:21.759690', 'step': 602, 'epoch': 1} {'type': 'loss', 'content': 0.2439243644475937, 'timestamp': '2025-10-01 04:13:21.761752', 'step': 603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:21.792681', 'step': 603, 'epoch': 1} {'type': 'loss', 'content': 0.1960311084985733, 'timestamp': '2025-10-01 04:13:21.824399', 'step': 604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:21.864039', 'step': 604, 'epoch': 1} {'type': 'loss', 'content': 0.1559300422668457, 'timestamp': '2025-10-01 04:13:21.866246', 'step': 605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:21.899209', 'step': 605, 'epoch': 1} {'type': 'loss', 'content': 0.15839622914791107, 'timestamp': '2025-10-01 04:13:21.901114', 'step': 606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:21.932188', 'step': 606, 'epoch': 1} {'type': 'loss', 'content': 0.09240008890628815, 'timestamp': '2025-10-01 04:13:21.934068', 'step': 607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:21.965191', 'step': 607, 'epoch': 1} {'type': 'loss', 'content': 0.15911686420440674, 'timestamp': '2025-10-01 04:13:21.988538', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:22.023338', 'step': 608, 'epoch': 1} {'type': 'loss', 'content': 0.10418505221605301, 'timestamp': '2025-10-01 04:13:22.029779', 'step': 609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:22.062452', 'step': 609, 'epoch': 1} {'type': 'loss', 'content': 0.20908518135547638, 'timestamp': '2025-10-01 04:13:22.064554', 'step': 610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:22.101932', 'step': 610, 'epoch': 1} {'type': 'loss', 'content': 0.23217767477035522, 'timestamp': '2025-10-01 04:13:22.107757', 'step': 611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:22.142696', 'step': 611, 'epoch': 1} {'type': 'loss', 'content': 0.23730814456939697, 'timestamp': '2025-10-01 04:13:22.167798', 'step': 612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:22.199008', 'step': 612, 'epoch': 1} {'type': 'loss', 'content': 0.14686860144138336, 'timestamp': '2025-10-01 04:13:22.200877', 'step': 613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:22.235444', 'step': 613, 'epoch': 1} {'type': 'loss', 'content': 0.20299910008907318, 'timestamp': '2025-10-01 04:13:22.237389', 'step': 614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:22.268821', 'step': 614, 'epoch': 1} {'type': 'loss', 'content': 0.2325689196586609, 'timestamp': '2025-10-01 04:13:22.270741', 'step': 615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:22.304603', 'step': 615, 'epoch': 1} {'type': 'loss', 'content': 0.17401796579360962, 'timestamp': '2025-10-01 04:13:22.328033', 'step': 616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:22.362683', 'step': 616, 'epoch': 1} {'type': 'loss', 'content': 0.22040624916553497, 'timestamp': '2025-10-01 04:13:22.364638', 'step': 617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:22.399511', 'step': 617, 'epoch': 1} {'type': 'loss', 'content': 0.25295960903167725, 'timestamp': '2025-10-01 04:13:22.401440', 'step': 618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:22.431675', 'step': 618, 'epoch': 1} {'type': 'loss', 'content': 0.14134159684181213, 'timestamp': '2025-10-01 04:13:22.436191', 'step': 619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:22.467728', 'step': 619, 'epoch': 1} {'type': 'loss', 'content': 0.26581472158432007, 'timestamp': '2025-10-01 04:13:22.491216', 'step': 620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:22.530902', 'step': 620, 'epoch': 1} {'type': 'loss', 'content': 0.2231028825044632, 'timestamp': '2025-10-01 04:13:22.532855', 'step': 621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:22.563851', 'step': 621, 'epoch': 1} {'type': 'loss', 'content': 0.24935786426067352, 'timestamp': '2025-10-01 04:13:22.566576', 'step': 622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:22.599844', 'step': 622, 'epoch': 1} {'type': 'loss', 'content': 0.26171156764030457, 'timestamp': '2025-10-01 04:13:22.601789', 'step': 623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:22.642306', 'step': 623, 'epoch': 1} {'type': 'loss', 'content': 0.19690310955047607, 'timestamp': '2025-10-01 04:13:22.665913', 'step': 624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:22.697820', 'step': 624, 'epoch': 1} {'type': 'loss', 'content': 0.15647636353969574, 'timestamp': '2025-10-01 04:13:22.700116', 'step': 625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:22.734243', 'step': 625, 'epoch': 1} {'type': 'loss', 'content': 0.2068011313676834, 'timestamp': '2025-10-01 04:13:22.736530', 'step': 626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:22.775802', 'step': 626, 'epoch': 1} {'type': 'loss', 'content': 0.2796107828617096, 'timestamp': '2025-10-01 04:13:22.777743', 'step': 627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:22.817303', 'step': 627, 'epoch': 1} {'type': 'loss', 'content': 0.1568278670310974, 'timestamp': '2025-10-01 04:13:22.840737', 'step': 628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:22.877940', 'step': 628, 'epoch': 1} {'type': 'loss', 'content': 0.2419370412826538, 'timestamp': '2025-10-01 04:13:22.880637', 'step': 629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:22.919683', 'step': 629, 'epoch': 1} {'type': 'loss', 'content': 0.1498260498046875, 'timestamp': '2025-10-01 04:13:22.923529', 'step': 630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:22.955786', 'step': 630, 'epoch': 1} {'type': 'loss', 'content': 0.1638302356004715, 'timestamp': '2025-10-01 04:13:22.958998', 'step': 631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:23.000589', 'step': 631, 'epoch': 1} {'type': 'loss', 'content': 0.19904442131519318, 'timestamp': '2025-10-01 04:13:23.024353', 'step': 632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:23.068068', 'step': 632, 'epoch': 1} {'type': 'loss', 'content': 0.14990730583667755, 'timestamp': '2025-10-01 04:13:23.070124', 'step': 633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:23.105922', 'step': 633, 'epoch': 1} {'type': 'loss', 'content': 0.21625003218650818, 'timestamp': '2025-10-01 04:13:23.107967', 'step': 634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:23.146050', 'step': 634, 'epoch': 1} {'type': 'loss', 'content': 0.1414519101381302, 'timestamp': '2025-10-01 04:13:23.148484', 'step': 635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:23.187674', 'step': 635, 'epoch': 1} {'type': 'loss', 'content': 0.19145239889621735, 'timestamp': '2025-10-01 04:13:23.211502', 'step': 636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:23.248268', 'step': 636, 'epoch': 1} {'type': 'loss', 'content': 0.1844811886548996, 'timestamp': '2025-10-01 04:13:23.250182', 'step': 637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:23.284209', 'step': 637, 'epoch': 1} {'type': 'loss', 'content': 0.22681719064712524, 'timestamp': '2025-10-01 04:13:23.286121', 'step': 638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:23.317015', 'step': 638, 'epoch': 1} {'type': 'loss', 'content': 0.19177743792533875, 'timestamp': '2025-10-01 04:13:23.318771', 'step': 639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:23.356711', 'step': 639, 'epoch': 1} {'type': 'loss', 'content': 0.16610153019428253, 'timestamp': '2025-10-01 04:13:23.380122', 'step': 640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:23.411607', 'step': 640, 'epoch': 1} {'type': 'loss', 'content': 0.19204531610012054, 'timestamp': '2025-10-01 04:13:23.413558', 'step': 641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:23.448790', 'step': 641, 'epoch': 1} {'type': 'loss', 'content': 0.1813422590494156, 'timestamp': '2025-10-01 04:13:23.451530', 'step': 642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:23.495429', 'step': 642, 'epoch': 1} {'type': 'loss', 'content': 0.2294466346502304, 'timestamp': '2025-10-01 04:13:23.504146', 'step': 643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:23.538096', 'step': 643, 'epoch': 1} {'type': 'loss', 'content': 0.18119630217552185, 'timestamp': '2025-10-01 04:13:23.562377', 'step': 644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:23.594226', 'step': 644, 'epoch': 1} {'type': 'loss', 'content': 0.2099386751651764, 'timestamp': '2025-10-01 04:13:23.596104', 'step': 645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:23.633574', 'step': 645, 'epoch': 1} {'type': 'loss', 'content': 0.17863021790981293, 'timestamp': '2025-10-01 04:13:23.635506', 'step': 646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:23.667026', 'step': 646, 'epoch': 1} {'type': 'loss', 'content': 0.21596390008926392, 'timestamp': '2025-10-01 04:13:23.668992', 'step': 647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:23.707799', 'step': 647, 'epoch': 1} {'type': 'loss', 'content': 0.254085898399353, 'timestamp': '2025-10-01 04:13:23.731462', 'step': 648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:23.763071', 'step': 648, 'epoch': 1} {'type': 'loss', 'content': 0.1975928395986557, 'timestamp': '2025-10-01 04:13:23.765113', 'step': 649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:23.798624', 'step': 649, 'epoch': 1} {'type': 'loss', 'content': 0.16115792095661163, 'timestamp': '2025-10-01 04:13:23.800639', 'step': 650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:23.837620', 'step': 650, 'epoch': 1} {'type': 'loss', 'content': 0.31605368852615356, 'timestamp': '2025-10-01 04:13:23.840433', 'step': 651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:23.872250', 'step': 651, 'epoch': 1} {'type': 'loss', 'content': 0.10036371648311615, 'timestamp': '2025-10-01 04:13:23.895772', 'step': 652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:23.933122', 'step': 652, 'epoch': 1} {'type': 'loss', 'content': 0.19662363827228546, 'timestamp': '2025-10-01 04:13:23.935064', 'step': 653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:23.977681', 'step': 653, 'epoch': 1} {'type': 'loss', 'content': 0.20461618900299072, 'timestamp': '2025-10-01 04:13:23.979599', 'step': 654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:24.011828', 'step': 654, 'epoch': 1} {'type': 'loss', 'content': 0.21328425407409668, 'timestamp': '2025-10-01 04:13:24.014240', 'step': 655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.058518', 'step': 655, 'epoch': 1} {'type': 'loss', 'content': 0.1046108677983284, 'timestamp': '2025-10-01 04:13:24.081887', 'step': 656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:24.112932', 'step': 656, 'epoch': 1} {'type': 'loss', 'content': 0.14959383010864258, 'timestamp': '2025-10-01 04:13:24.114876', 'step': 657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.149659', 'step': 657, 'epoch': 1} {'type': 'loss', 'content': 0.1554035246372223, 'timestamp': '2025-10-01 04:13:24.151579', 'step': 658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:24.183979', 'step': 658, 'epoch': 1} {'type': 'loss', 'content': 0.19701366126537323, 'timestamp': '2025-10-01 04:13:24.186019', 'step': 659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.222568', 'step': 659, 'epoch': 1} {'type': 'loss', 'content': 0.14998197555541992, 'timestamp': '2025-10-01 04:13:24.245977', 'step': 660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.277811', 'step': 660, 'epoch': 1} {'type': 'loss', 'content': 0.18131983280181885, 'timestamp': '2025-10-01 04:13:24.279890', 'step': 661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.317129', 'step': 661, 'epoch': 1} {'type': 'loss', 'content': 0.23804232478141785, 'timestamp': '2025-10-01 04:13:24.319072', 'step': 662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.350028', 'step': 662, 'epoch': 1} {'type': 'loss', 'content': 0.28362318873405457, 'timestamp': '2025-10-01 04:13:24.352101', 'step': 663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.383590', 'step': 663, 'epoch': 1} {'type': 'loss', 'content': 0.21842968463897705, 'timestamp': '2025-10-01 04:13:24.407158', 'step': 664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.437675', 'step': 664, 'epoch': 1} {'type': 'loss', 'content': 0.23476895689964294, 'timestamp': '2025-10-01 04:13:24.439587', 'step': 665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:24.474107', 'step': 665, 'epoch': 1} {'type': 'loss', 'content': 0.13273823261260986, 'timestamp': '2025-10-01 04:13:24.476040', 'step': 666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:24.513782', 'step': 666, 'epoch': 1} {'type': 'loss', 'content': 0.2338760942220688, 'timestamp': '2025-10-01 04:13:24.516267', 'step': 667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:24.552282', 'step': 667, 'epoch': 1} {'type': 'loss', 'content': 0.19874398410320282, 'timestamp': '2025-10-01 04:13:24.575699', 'step': 668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:24.611188', 'step': 668, 'epoch': 1} {'type': 'loss', 'content': 0.38962432742118835, 'timestamp': '2025-10-01 04:13:24.613034', 'step': 669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:13:24.652134', 'step': 669, 'epoch': 1} {'type': 'loss', 'content': 0.2465783655643463, 'timestamp': '2025-10-01 04:13:24.656564', 'step': 670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.693451', 'step': 670, 'epoch': 1} {'type': 'loss', 'content': 0.17686815559864044, 'timestamp': '2025-10-01 04:13:24.695587', 'step': 671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:24.732636', 'step': 671, 'epoch': 1} {'type': 'loss', 'content': 0.18391726911067963, 'timestamp': '2025-10-01 04:13:24.756027', 'step': 672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:24.788039', 'step': 672, 'epoch': 1} {'type': 'loss', 'content': 0.1853116750717163, 'timestamp': '2025-10-01 04:13:24.790032', 'step': 673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.830861', 'step': 673, 'epoch': 1} {'type': 'loss', 'content': 0.19576525688171387, 'timestamp': '2025-10-01 04:13:24.832904', 'step': 674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.864064', 'step': 674, 'epoch': 1} {'type': 'loss', 'content': 0.17962129414081573, 'timestamp': '2025-10-01 04:13:24.866021', 'step': 675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:24.897699', 'step': 675, 'epoch': 1} {'type': 'loss', 'content': 0.1790211945772171, 'timestamp': '2025-10-01 04:13:24.921075', 'step': 676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.952284', 'step': 676, 'epoch': 1} {'type': 'loss', 'content': 0.15425421297550201, 'timestamp': '2025-10-01 04:13:24.954191', 'step': 677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:24.986289', 'step': 677, 'epoch': 1} {'type': 'loss', 'content': 0.3379635512828827, 'timestamp': '2025-10-01 04:13:24.988417', 'step': 678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:25.019093', 'step': 678, 'epoch': 1} {'type': 'loss', 'content': 0.17415957152843475, 'timestamp': '2025-10-01 04:13:25.021204', 'step': 679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:25.062190', 'step': 679, 'epoch': 1} {'type': 'loss', 'content': 0.19740547239780426, 'timestamp': '2025-10-01 04:13:25.085637', 'step': 680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:25.120330', 'step': 680, 'epoch': 1} {'type': 'loss', 'content': 0.18271026015281677, 'timestamp': '2025-10-01 04:13:25.122273', 'step': 681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:25.158644', 'step': 681, 'epoch': 1} {'type': 'loss', 'content': 0.19026175141334534, 'timestamp': '2025-10-01 04:13:25.160536', 'step': 682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:25.195791', 'step': 682, 'epoch': 1} {'type': 'loss', 'content': 0.18575692176818848, 'timestamp': '2025-10-01 04:13:25.197907', 'step': 683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:25.227992', 'step': 683, 'epoch': 1} {'type': 'loss', 'content': 0.14799518883228302, 'timestamp': '2025-10-01 04:13:25.251337', 'step': 684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:25.284588', 'step': 684, 'epoch': 1} {'type': 'loss', 'content': 0.20331774652004242, 'timestamp': '2025-10-01 04:13:25.286424', 'step': 685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:25.316786', 'step': 685, 'epoch': 1} {'type': 'loss', 'content': 0.20827513933181763, 'timestamp': '2025-10-01 04:13:25.318742', 'step': 686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:25.350603', 'step': 686, 'epoch': 1} {'type': 'loss', 'content': 0.1925032138824463, 'timestamp': '2025-10-01 04:13:25.352579', 'step': 687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:25.385737', 'step': 687, 'epoch': 1} {'type': 'loss', 'content': 0.14536310732364655, 'timestamp': '2025-10-01 04:13:25.409618', 'step': 688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:25.441214', 'step': 688, 'epoch': 1} {'type': 'loss', 'content': 0.20664779841899872, 'timestamp': '2025-10-01 04:13:25.443264', 'step': 689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:25.476692', 'step': 689, 'epoch': 1} {'type': 'loss', 'content': 0.24515791237354279, 'timestamp': '2025-10-01 04:13:25.479079', 'step': 690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:25.515034', 'step': 690, 'epoch': 1} {'type': 'loss', 'content': 0.17344938218593597, 'timestamp': '2025-10-01 04:13:25.517033', 'step': 691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:25.553208', 'step': 691, 'epoch': 1} {'type': 'loss', 'content': 0.20729456841945648, 'timestamp': '2025-10-01 04:13:25.576813', 'step': 692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:25.614263', 'step': 692, 'epoch': 1} {'type': 'loss', 'content': 0.21811369061470032, 'timestamp': '2025-10-01 04:13:25.616294', 'step': 693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:25.649270', 'step': 693, 'epoch': 1} {'type': 'loss', 'content': 0.2863270938396454, 'timestamp': '2025-10-01 04:13:25.651225', 'step': 694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:25.686358', 'step': 694, 'epoch': 1} {'type': 'loss', 'content': 0.25972306728363037, 'timestamp': '2025-10-01 04:13:25.688378', 'step': 695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:25.719410', 'step': 695, 'epoch': 1} {'type': 'loss', 'content': 0.15666168928146362, 'timestamp': '2025-10-01 04:13:25.748594', 'step': 696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:25.785330', 'step': 696, 'epoch': 1} {'type': 'loss', 'content': 0.21780180931091309, 'timestamp': '2025-10-01 04:13:25.787503', 'step': 697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:25.821976', 'step': 697, 'epoch': 1} {'type': 'loss', 'content': 0.20028771460056305, 'timestamp': '2025-10-01 04:13:25.824420', 'step': 698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:25.855785', 'step': 698, 'epoch': 1} {'type': 'loss', 'content': 0.22753483057022095, 'timestamp': '2025-10-01 04:13:25.865126', 'step': 699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:25.898973', 'step': 699, 'epoch': 1} {'type': 'loss', 'content': 0.09673946350812912, 'timestamp': '2025-10-01 04:13:25.922597', 'step': 700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:25.959534', 'step': 700, 'epoch': 1} {'type': 'loss', 'content': 0.23011347651481628, 'timestamp': '2025-10-01 04:13:25.962245', 'step': 701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:25.993601', 'step': 701, 'epoch': 1} {'type': 'loss', 'content': 0.1259775459766388, 'timestamp': '2025-10-01 04:13:25.995506', 'step': 702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:26.027445', 'step': 702, 'epoch': 1} {'type': 'loss', 'content': 0.20154467225074768, 'timestamp': '2025-10-01 04:13:26.029314', 'step': 703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:26.061232', 'step': 703, 'epoch': 1} {'type': 'loss', 'content': 0.1895882934331894, 'timestamp': '2025-10-01 04:13:26.084602', 'step': 704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:26.115378', 'step': 704, 'epoch': 1} {'type': 'loss', 'content': 0.22949758172035217, 'timestamp': '2025-10-01 04:13:26.117266', 'step': 705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:26.152301', 'step': 705, 'epoch': 1} {'type': 'loss', 'content': 0.23370951414108276, 'timestamp': '2025-10-01 04:13:26.154190', 'step': 706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:26.187286', 'step': 706, 'epoch': 1} {'type': 'loss', 'content': 0.2679346203804016, 'timestamp': '2025-10-01 04:13:26.189717', 'step': 707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:26.230878', 'step': 707, 'epoch': 1} {'type': 'loss', 'content': 0.18372933566570282, 'timestamp': '2025-10-01 04:13:26.256033', 'step': 708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:26.296515', 'step': 708, 'epoch': 1} {'type': 'loss', 'content': 0.14451470971107483, 'timestamp': '2025-10-01 04:13:26.298475', 'step': 709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:26.331035', 'step': 709, 'epoch': 1} {'type': 'loss', 'content': 0.18564443290233612, 'timestamp': '2025-10-01 04:13:26.342851', 'step': 710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:26.375953', 'step': 710, 'epoch': 1} {'type': 'loss', 'content': 0.2063123732805252, 'timestamp': '2025-10-01 04:13:26.377868', 'step': 711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:26.408862', 'step': 711, 'epoch': 1} {'type': 'loss', 'content': 0.14810287952423096, 'timestamp': '2025-10-01 04:13:26.432336', 'step': 712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:26.466405', 'step': 712, 'epoch': 1} {'type': 'loss', 'content': 0.20554181933403015, 'timestamp': '2025-10-01 04:13:26.468311', 'step': 713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:26.498147', 'step': 713, 'epoch': 1} {'type': 'loss', 'content': 0.18496064841747284, 'timestamp': '2025-10-01 04:13:26.499998', 'step': 714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:26.534290', 'step': 714, 'epoch': 1} {'type': 'loss', 'content': 0.16779200732707977, 'timestamp': '2025-10-01 04:13:26.536332', 'step': 715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:26.574599', 'step': 715, 'epoch': 1} {'type': 'loss', 'content': 0.19441518187522888, 'timestamp': '2025-10-01 04:13:26.597941', 'step': 716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:26.629593', 'step': 716, 'epoch': 1} {'type': 'loss', 'content': 0.13884416222572327, 'timestamp': '2025-10-01 04:13:26.631748', 'step': 717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:26.668793', 'step': 717, 'epoch': 1} {'type': 'loss', 'content': 0.23957504332065582, 'timestamp': '2025-10-01 04:13:26.670774', 'step': 718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:26.701383', 'step': 718, 'epoch': 1} {'type': 'loss', 'content': 0.22579483687877655, 'timestamp': '2025-10-01 04:13:26.703516', 'step': 719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:26.734262', 'step': 719, 'epoch': 1} {'type': 'loss', 'content': 0.13682033121585846, 'timestamp': '2025-10-01 04:13:26.757653', 'step': 720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:26.794817', 'step': 720, 'epoch': 1} {'type': 'loss', 'content': 0.12059203535318375, 'timestamp': '2025-10-01 04:13:26.796786', 'step': 721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:26.832172', 'step': 721, 'epoch': 1} {'type': 'loss', 'content': 0.2011057287454605, 'timestamp': '2025-10-01 04:13:26.834143', 'step': 722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:26.868814', 'step': 722, 'epoch': 1} {'type': 'loss', 'content': 0.21679934859275818, 'timestamp': '2025-10-01 04:13:26.870895', 'step': 723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:26.907970', 'step': 723, 'epoch': 1} {'type': 'loss', 'content': 0.0632939413189888, 'timestamp': '2025-10-01 04:13:26.931373', 'step': 724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:26.965851', 'step': 724, 'epoch': 1} {'type': 'loss', 'content': 0.2168097198009491, 'timestamp': '2025-10-01 04:13:26.968381', 'step': 725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:27.008742', 'step': 725, 'epoch': 1} {'type': 'loss', 'content': 0.2214609831571579, 'timestamp': '2025-10-01 04:13:27.010650', 'step': 726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:27.046594', 'step': 726, 'epoch': 1} {'type': 'loss', 'content': 0.14415553212165833, 'timestamp': '2025-10-01 04:13:27.048476', 'step': 727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:27.080448', 'step': 727, 'epoch': 1} {'type': 'loss', 'content': 0.18563009798526764, 'timestamp': '2025-10-01 04:13:27.104023', 'step': 728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:27.136199', 'step': 728, 'epoch': 1} {'type': 'loss', 'content': 0.18476377427577972, 'timestamp': '2025-10-01 04:13:27.138168', 'step': 729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:27.172058', 'step': 729, 'epoch': 1} {'type': 'loss', 'content': 0.1750229299068451, 'timestamp': '2025-10-01 04:13:27.174006', 'step': 730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:27.204086', 'step': 730, 'epoch': 1} {'type': 'loss', 'content': 0.23032698035240173, 'timestamp': '2025-10-01 04:13:27.206629', 'step': 731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:27.240382', 'step': 731, 'epoch': 1} {'type': 'loss', 'content': 0.146841898560524, 'timestamp': '2025-10-01 04:13:27.263926', 'step': 732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:27.298941', 'step': 732, 'epoch': 1} {'type': 'loss', 'content': 0.2165866494178772, 'timestamp': '2025-10-01 04:13:27.300782', 'step': 733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:27.331881', 'step': 733, 'epoch': 1} {'type': 'loss', 'content': 0.22045645117759705, 'timestamp': '2025-10-01 04:13:27.333514', 'step': 734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:27.364869', 'step': 734, 'epoch': 1} {'type': 'loss', 'content': 0.29953762888908386, 'timestamp': '2025-10-01 04:13:27.366938', 'step': 735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:27.415114', 'step': 735, 'epoch': 1} {'type': 'loss', 'content': 0.2064448744058609, 'timestamp': '2025-10-01 04:13:27.438706', 'step': 736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:27.486778', 'step': 736, 'epoch': 1} {'type': 'loss', 'content': 0.17959268391132355, 'timestamp': '2025-10-01 04:13:27.488844', 'step': 737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:27.524028', 'step': 737, 'epoch': 1} {'type': 'loss', 'content': 0.14577338099479675, 'timestamp': '2025-10-01 04:13:27.525741', 'step': 738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:27.560060', 'step': 738, 'epoch': 1} {'type': 'loss', 'content': 0.15744486451148987, 'timestamp': '2025-10-01 04:13:27.561963', 'step': 739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:27.595396', 'step': 739, 'epoch': 1} {'type': 'loss', 'content': 0.11111482232809067, 'timestamp': '2025-10-01 04:13:27.618667', 'step': 740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:27.649785', 'step': 740, 'epoch': 1} {'type': 'loss', 'content': 0.1685817539691925, 'timestamp': '2025-10-01 04:13:27.651540', 'step': 741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:27.683636', 'step': 741, 'epoch': 1} {'type': 'loss', 'content': 0.1685725301504135, 'timestamp': '2025-10-01 04:13:27.685647', 'step': 742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:27.716942', 'step': 742, 'epoch': 1} {'type': 'loss', 'content': 0.24692289531230927, 'timestamp': '2025-10-01 04:13:27.719116', 'step': 743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:27.749400', 'step': 743, 'epoch': 1} {'type': 'loss', 'content': 0.19252169132232666, 'timestamp': '2025-10-01 04:13:27.773077', 'step': 744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:27.803781', 'step': 744, 'epoch': 1} {'type': 'loss', 'content': 0.15055784583091736, 'timestamp': '2025-10-01 04:13:27.807168', 'step': 745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:27.840203', 'step': 745, 'epoch': 1} {'type': 'loss', 'content': 0.20281943678855896, 'timestamp': '2025-10-01 04:13:27.842400', 'step': 746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:27.874738', 'step': 746, 'epoch': 1} {'type': 'loss', 'content': 0.20521755516529083, 'timestamp': '2025-10-01 04:13:27.877254', 'step': 747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:27.915778', 'step': 747, 'epoch': 1} {'type': 'loss', 'content': 0.22660301625728607, 'timestamp': '2025-10-01 04:13:27.939869', 'step': 748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:27.977888', 'step': 748, 'epoch': 1} {'type': 'loss', 'content': 0.21845491230487823, 'timestamp': '2025-10-01 04:13:27.980017', 'step': 749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:28.017269', 'step': 749, 'epoch': 1} {'type': 'loss', 'content': 0.2659546434879303, 'timestamp': '2025-10-01 04:13:28.019164', 'step': 750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:28.050309', 'step': 750, 'epoch': 1} {'type': 'loss', 'content': 0.24771443009376526, 'timestamp': '2025-10-01 04:13:28.052290', 'step': 751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:28.084555', 'step': 751, 'epoch': 1} {'type': 'loss', 'content': 0.2699565887451172, 'timestamp': '2025-10-01 04:13:28.108068', 'step': 752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:28.142133', 'step': 752, 'epoch': 1} {'type': 'loss', 'content': 0.18945293128490448, 'timestamp': '2025-10-01 04:13:28.144005', 'step': 753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:28.178852', 'step': 753, 'epoch': 1} {'type': 'loss', 'content': 0.22888235747814178, 'timestamp': '2025-10-01 04:13:28.180570', 'step': 754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:28.211044', 'step': 754, 'epoch': 1} {'type': 'loss', 'content': 0.17588765919208527, 'timestamp': '2025-10-01 04:13:28.213005', 'step': 755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:28.244680', 'step': 755, 'epoch': 1} {'type': 'loss', 'content': 0.16179847717285156, 'timestamp': '2025-10-01 04:13:28.268058', 'step': 756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:28.304075', 'step': 756, 'epoch': 1} {'type': 'loss', 'content': 0.16516205668449402, 'timestamp': '2025-10-01 04:13:28.307217', 'step': 757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:28.342330', 'step': 757, 'epoch': 1} {'type': 'loss', 'content': 0.34444189071655273, 'timestamp': '2025-10-01 04:13:28.344267', 'step': 758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:28.381802', 'step': 758, 'epoch': 1} {'type': 'loss', 'content': 0.13632790744304657, 'timestamp': '2025-10-01 04:13:28.383782', 'step': 759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:28.415504', 'step': 759, 'epoch': 1} {'type': 'loss', 'content': 0.1440207064151764, 'timestamp': '2025-10-01 04:13:28.439057', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:28.477169', 'step': 760, 'epoch': 1} {'type': 'loss', 'content': 0.14027570188045502, 'timestamp': '2025-10-01 04:13:28.479105', 'step': 761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:28.512156', 'step': 761, 'epoch': 1} {'type': 'loss', 'content': 0.14688220620155334, 'timestamp': '2025-10-01 04:13:28.514153', 'step': 762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:28.545925', 'step': 762, 'epoch': 1} {'type': 'loss', 'content': 0.30585238337516785, 'timestamp': '2025-10-01 04:13:28.552207', 'step': 763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:28.590362', 'step': 763, 'epoch': 1} {'type': 'loss', 'content': 0.1637611985206604, 'timestamp': '2025-10-01 04:13:28.620214', 'step': 764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:28.662805', 'step': 764, 'epoch': 1} {'type': 'loss', 'content': 0.3153875172138214, 'timestamp': '2025-10-01 04:13:28.664844', 'step': 765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:28.701908', 'step': 765, 'epoch': 1} {'type': 'loss', 'content': 0.26143893599510193, 'timestamp': '2025-10-01 04:13:28.703811', 'step': 766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:28.737607', 'step': 766, 'epoch': 1} {'type': 'loss', 'content': 0.1255941092967987, 'timestamp': '2025-10-01 04:13:28.739760', 'step': 767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:28.781629', 'step': 767, 'epoch': 1} {'type': 'loss', 'content': 0.1939428299665451, 'timestamp': '2025-10-01 04:13:28.805804', 'step': 768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:28.849095', 'step': 768, 'epoch': 1} {'type': 'loss', 'content': 0.23349791765213013, 'timestamp': '2025-10-01 04:13:28.851087', 'step': 769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:28.888413', 'step': 769, 'epoch': 1} {'type': 'loss', 'content': 0.16560600697994232, 'timestamp': '2025-10-01 04:13:28.890344', 'step': 770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:28.922018', 'step': 770, 'epoch': 1} {'type': 'loss', 'content': 0.14954477548599243, 'timestamp': '2025-10-01 04:13:28.923896', 'step': 771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:28.954272', 'step': 771, 'epoch': 1} {'type': 'loss', 'content': 0.08560175448656082, 'timestamp': '2025-10-01 04:13:28.977716', 'step': 772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:29.011313', 'step': 772, 'epoch': 1} {'type': 'loss', 'content': 0.18537823855876923, 'timestamp': '2025-10-01 04:13:29.013237', 'step': 773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:29.043845', 'step': 773, 'epoch': 1} {'type': 'loss', 'content': 0.20945237576961517, 'timestamp': '2025-10-01 04:13:29.045751', 'step': 774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:29.078801', 'step': 774, 'epoch': 1} {'type': 'loss', 'content': 0.12023024260997772, 'timestamp': '2025-10-01 04:13:29.081264', 'step': 775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:29.110882', 'step': 775, 'epoch': 1} {'type': 'loss', 'content': 0.2532610297203064, 'timestamp': '2025-10-01 04:13:29.134304', 'step': 776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:29.163999', 'step': 776, 'epoch': 1} {'type': 'loss', 'content': 0.22996950149536133, 'timestamp': '2025-10-01 04:13:29.166119', 'step': 777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:29.195105', 'step': 777, 'epoch': 1} {'type': 'loss', 'content': 0.09590543061494827, 'timestamp': '2025-10-01 04:13:29.196978', 'step': 778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:29.227030', 'step': 778, 'epoch': 1} {'type': 'loss', 'content': 0.24690182507038116, 'timestamp': '2025-10-01 04:13:29.228996', 'step': 779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:29.259161', 'step': 779, 'epoch': 1} {'type': 'loss', 'content': 0.2422400712966919, 'timestamp': '2025-10-01 04:13:29.282763', 'step': 780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:29.314126', 'step': 780, 'epoch': 1} {'type': 'loss', 'content': 0.12851582467556, 'timestamp': '2025-10-01 04:13:29.316127', 'step': 781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:29.357767', 'step': 781, 'epoch': 1} {'type': 'loss', 'content': 0.16155017912387848, 'timestamp': '2025-10-01 04:13:29.360265', 'step': 782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:29.401221', 'step': 782, 'epoch': 1} {'type': 'loss', 'content': 0.23241394758224487, 'timestamp': '2025-10-01 04:13:29.403135', 'step': 783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:29.436576', 'step': 783, 'epoch': 1} {'type': 'loss', 'content': 0.2284482717514038, 'timestamp': '2025-10-01 04:13:29.460567', 'step': 784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:29.496986', 'step': 784, 'epoch': 1} {'type': 'loss', 'content': 0.2199501097202301, 'timestamp': '2025-10-01 04:13:29.499029', 'step': 785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:29.531291', 'step': 785, 'epoch': 1} {'type': 'loss', 'content': 0.29646846652030945, 'timestamp': '2025-10-01 04:13:29.533405', 'step': 786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:29.565936', 'step': 786, 'epoch': 1} {'type': 'loss', 'content': 0.2219458967447281, 'timestamp': '2025-10-01 04:13:29.567835', 'step': 787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:29.598847', 'step': 787, 'epoch': 1} {'type': 'loss', 'content': 0.13267835974693298, 'timestamp': '2025-10-01 04:13:29.622372', 'step': 788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:29.654372', 'step': 788, 'epoch': 1} {'type': 'loss', 'content': 0.16148623824119568, 'timestamp': '2025-10-01 04:13:29.656283', 'step': 789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:29.697828', 'step': 789, 'epoch': 1} {'type': 'loss', 'content': 0.2948680818080902, 'timestamp': '2025-10-01 04:13:29.700331', 'step': 790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:29.733527', 'step': 790, 'epoch': 1} {'type': 'loss', 'content': 0.20046789944171906, 'timestamp': '2025-10-01 04:13:29.735431', 'step': 791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:29.768253', 'step': 791, 'epoch': 1} {'type': 'loss', 'content': 0.17732924222946167, 'timestamp': '2025-10-01 04:13:29.791673', 'step': 792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:29.824561', 'step': 792, 'epoch': 1} {'type': 'loss', 'content': 0.14681023359298706, 'timestamp': '2025-10-01 04:13:29.826406', 'step': 793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:29.860504', 'step': 793, 'epoch': 1} {'type': 'loss', 'content': 0.2895553708076477, 'timestamp': '2025-10-01 04:13:29.862440', 'step': 794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:29.898913', 'step': 794, 'epoch': 1} {'type': 'loss', 'content': 0.15119698643684387, 'timestamp': '2025-10-01 04:13:29.901014', 'step': 795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:29.935049', 'step': 795, 'epoch': 1} {'type': 'loss', 'content': 0.16114398837089539, 'timestamp': '2025-10-01 04:13:29.958644', 'step': 796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:29.989909', 'step': 796, 'epoch': 1} {'type': 'loss', 'content': 0.092211052775383, 'timestamp': '2025-10-01 04:13:29.991755', 'step': 797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:30.025264', 'step': 797, 'epoch': 1} {'type': 'loss', 'content': 0.11737655103206635, 'timestamp': '2025-10-01 04:13:30.027268', 'step': 798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:30.066264', 'step': 798, 'epoch': 1} {'type': 'loss', 'content': 0.23472866415977478, 'timestamp': '2025-10-01 04:13:30.068142', 'step': 799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:30.105586', 'step': 799, 'epoch': 1} {'type': 'loss', 'content': 0.2878880500793457, 'timestamp': '2025-10-01 04:13:30.128960', 'step': 800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:30.166104', 'step': 800, 'epoch': 1} {'type': 'loss', 'content': 0.20382539927959442, 'timestamp': '2025-10-01 04:13:30.167996', 'step': 801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:30.203212', 'step': 801, 'epoch': 1} {'type': 'loss', 'content': 0.13354529440402985, 'timestamp': '2025-10-01 04:13:30.205130', 'step': 802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:30.237348', 'step': 802, 'epoch': 1} {'type': 'loss', 'content': 0.2343875914812088, 'timestamp': '2025-10-01 04:13:30.239328', 'step': 803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:30.269744', 'step': 803, 'epoch': 1} {'type': 'loss', 'content': 0.18166351318359375, 'timestamp': '2025-10-01 04:13:30.293252', 'step': 804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:30.328415', 'step': 804, 'epoch': 1} {'type': 'loss', 'content': 0.1292002648115158, 'timestamp': '2025-10-01 04:13:30.330360', 'step': 805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:30.363621', 'step': 805, 'epoch': 1} {'type': 'loss', 'content': 0.2130240648984909, 'timestamp': '2025-10-01 04:13:30.366557', 'step': 806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:30.403343', 'step': 806, 'epoch': 1} {'type': 'loss', 'content': 0.2868455946445465, 'timestamp': '2025-10-01 04:13:30.405733', 'step': 807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:30.436303', 'step': 807, 'epoch': 1} {'type': 'loss', 'content': 0.19971734285354614, 'timestamp': '2025-10-01 04:13:30.459764', 'step': 808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:30.494769', 'step': 808, 'epoch': 1} {'type': 'loss', 'content': 0.2205328345298767, 'timestamp': '2025-10-01 04:13:30.496778', 'step': 809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:30.532286', 'step': 809, 'epoch': 1} {'type': 'loss', 'content': 0.19084744155406952, 'timestamp': '2025-10-01 04:13:30.534502', 'step': 810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:30.572515', 'step': 810, 'epoch': 1} {'type': 'loss', 'content': 0.23938578367233276, 'timestamp': '2025-10-01 04:13:30.574692', 'step': 811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:30.610966', 'step': 811, 'epoch': 1} {'type': 'loss', 'content': 0.17098744213581085, 'timestamp': '2025-10-01 04:13:30.634390', 'step': 812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:30.668055', 'step': 812, 'epoch': 1} {'type': 'loss', 'content': 0.13602252304553986, 'timestamp': '2025-10-01 04:13:30.670090', 'step': 813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:30.705266', 'step': 813, 'epoch': 1} {'type': 'loss', 'content': 0.18381452560424805, 'timestamp': '2025-10-01 04:13:30.707754', 'step': 814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:13:30.743048', 'step': 814, 'epoch': 1} {'type': 'loss', 'content': 0.18733501434326172, 'timestamp': '2025-10-01 04:13:30.747428', 'step': 815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:30.782018', 'step': 815, 'epoch': 1} {'type': 'loss', 'content': 0.18503046035766602, 'timestamp': '2025-10-01 04:13:30.805571', 'step': 816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:30.837789', 'step': 816, 'epoch': 1} {'type': 'loss', 'content': 0.18810991942882538, 'timestamp': '2025-10-01 04:13:30.839926', 'step': 817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:30.872510', 'step': 817, 'epoch': 1} {'type': 'loss', 'content': 0.19313347339630127, 'timestamp': '2025-10-01 04:13:30.874894', 'step': 818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:30.906563', 'step': 818, 'epoch': 1} {'type': 'loss', 'content': 0.18241195380687714, 'timestamp': '2025-10-01 04:13:30.908984', 'step': 819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:30.940481', 'step': 819, 'epoch': 1} {'type': 'loss', 'content': 0.1840258240699768, 'timestamp': '2025-10-01 04:13:30.963910', 'step': 820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:30.997274', 'step': 820, 'epoch': 1} {'type': 'loss', 'content': 0.2725401818752289, 'timestamp': '2025-10-01 04:13:30.999136', 'step': 821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:31.036528', 'step': 821, 'epoch': 1} {'type': 'loss', 'content': 0.16511058807373047, 'timestamp': '2025-10-01 04:13:31.038823', 'step': 822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.070075', 'step': 822, 'epoch': 1} {'type': 'loss', 'content': 0.14795486629009247, 'timestamp': '2025-10-01 04:13:31.071996', 'step': 823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.110716', 'step': 823, 'epoch': 1} {'type': 'loss', 'content': 0.14387041330337524, 'timestamp': '2025-10-01 04:13:31.135242', 'step': 824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:31.169888', 'step': 824, 'epoch': 1} {'type': 'loss', 'content': 0.25211212038993835, 'timestamp': '2025-10-01 04:13:31.172063', 'step': 825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:31.208425', 'step': 825, 'epoch': 1} {'type': 'loss', 'content': 0.14528405666351318, 'timestamp': '2025-10-01 04:13:31.210915', 'step': 826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.247059', 'step': 826, 'epoch': 1} {'type': 'loss', 'content': 0.30473408102989197, 'timestamp': '2025-10-01 04:13:31.249009', 'step': 827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:31.288478', 'step': 827, 'epoch': 1} {'type': 'loss', 'content': 0.24058455228805542, 'timestamp': '2025-10-01 04:13:31.311882', 'step': 828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:31.355770', 'step': 828, 'epoch': 1} {'type': 'loss', 'content': 0.137437641620636, 'timestamp': '2025-10-01 04:13:31.358118', 'step': 829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:31.390858', 'step': 829, 'epoch': 1} {'type': 'loss', 'content': 0.14201441407203674, 'timestamp': '2025-10-01 04:13:31.392897', 'step': 830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.430894', 'step': 830, 'epoch': 1} {'type': 'loss', 'content': 0.35870295763015747, 'timestamp': '2025-10-01 04:13:31.433089', 'step': 831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.464303', 'step': 831, 'epoch': 1} {'type': 'loss', 'content': 0.18439073860645294, 'timestamp': '2025-10-01 04:13:31.488366', 'step': 832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.525124', 'step': 832, 'epoch': 1} {'type': 'loss', 'content': 0.1276065856218338, 'timestamp': '2025-10-01 04:13:31.527742', 'step': 833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:31.559548', 'step': 833, 'epoch': 1} {'type': 'loss', 'content': 0.15129077434539795, 'timestamp': '2025-10-01 04:13:31.561378', 'step': 834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:31.594955', 'step': 834, 'epoch': 1} {'type': 'loss', 'content': 0.1450529396533966, 'timestamp': '2025-10-01 04:13:31.597398', 'step': 835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:31.630128', 'step': 835, 'epoch': 1} {'type': 'loss', 'content': 0.15675775706768036, 'timestamp': '2025-10-01 04:13:31.653552', 'step': 836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.687435', 'step': 836, 'epoch': 1} {'type': 'loss', 'content': 0.17872591316699982, 'timestamp': '2025-10-01 04:13:31.689371', 'step': 837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:31.720876', 'step': 837, 'epoch': 1} {'type': 'loss', 'content': 0.1990274041891098, 'timestamp': '2025-10-01 04:13:31.723261', 'step': 838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:31.754611', 'step': 838, 'epoch': 1} {'type': 'loss', 'content': 0.2975572943687439, 'timestamp': '2025-10-01 04:13:31.756793', 'step': 839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.788503', 'step': 839, 'epoch': 1} {'type': 'loss', 'content': 0.18245619535446167, 'timestamp': '2025-10-01 04:13:31.819494', 'step': 840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:31.850500', 'step': 840, 'epoch': 1} {'type': 'loss', 'content': 0.1760142594575882, 'timestamp': '2025-10-01 04:13:31.852988', 'step': 841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.885747', 'step': 841, 'epoch': 1} {'type': 'loss', 'content': 0.10407473146915436, 'timestamp': '2025-10-01 04:13:31.895058', 'step': 842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.930912', 'step': 842, 'epoch': 1} {'type': 'loss', 'content': 0.24481278657913208, 'timestamp': '2025-10-01 04:13:31.933803', 'step': 843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:31.966956', 'step': 843, 'epoch': 1} {'type': 'loss', 'content': 0.24196770787239075, 'timestamp': '2025-10-01 04:13:31.991119', 'step': 844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:32.024189', 'step': 844, 'epoch': 1} {'type': 'loss', 'content': 0.19831442832946777, 'timestamp': '2025-10-01 04:13:32.026212', 'step': 845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:32.058869', 'step': 845, 'epoch': 1} {'type': 'loss', 'content': 0.2273898869752884, 'timestamp': '2025-10-01 04:13:32.061369', 'step': 846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:32.092584', 'step': 846, 'epoch': 1} {'type': 'loss', 'content': 0.17928224802017212, 'timestamp': '2025-10-01 04:13:32.094590', 'step': 847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:32.125085', 'step': 847, 'epoch': 1} {'type': 'loss', 'content': 0.23502294719219208, 'timestamp': '2025-10-01 04:13:32.148830', 'step': 848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:32.178821', 'step': 848, 'epoch': 1} {'type': 'loss', 'content': 0.14804986119270325, 'timestamp': '2025-10-01 04:13:32.180778', 'step': 849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:32.212662', 'step': 849, 'epoch': 1} {'type': 'loss', 'content': 0.17251244187355042, 'timestamp': '2025-10-01 04:13:32.215298', 'step': 850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:32.250925', 'step': 850, 'epoch': 1} {'type': 'loss', 'content': 0.23328766226768494, 'timestamp': '2025-10-01 04:13:32.253408', 'step': 851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:32.286148', 'step': 851, 'epoch': 1} {'type': 'loss', 'content': 0.15424945950508118, 'timestamp': '2025-10-01 04:13:32.309791', 'step': 852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:32.341368', 'step': 852, 'epoch': 1} {'type': 'loss', 'content': 0.16665299236774445, 'timestamp': '2025-10-01 04:13:32.343306', 'step': 853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:32.381509', 'step': 853, 'epoch': 1} {'type': 'loss', 'content': 0.18380951881408691, 'timestamp': '2025-10-01 04:13:32.383944', 'step': 854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:32.417639', 'step': 854, 'epoch': 1} {'type': 'loss', 'content': 0.17468546330928802, 'timestamp': '2025-10-01 04:13:32.419725', 'step': 855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:32.449646', 'step': 855, 'epoch': 1} {'type': 'loss', 'content': 0.157766193151474, 'timestamp': '2025-10-01 04:13:32.472888', 'step': 856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:32.502296', 'step': 856, 'epoch': 1} {'type': 'loss', 'content': 0.2905932366847992, 'timestamp': '2025-10-01 04:13:32.505388', 'step': 857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:32.537150', 'step': 857, 'epoch': 1} {'type': 'loss', 'content': 0.2153184860944748, 'timestamp': '2025-10-01 04:13:32.539063', 'step': 858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:32.569802', 'step': 858, 'epoch': 1} {'type': 'loss', 'content': 0.1275782734155655, 'timestamp': '2025-10-01 04:13:32.571946', 'step': 859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:32.609234', 'step': 859, 'epoch': 1} {'type': 'loss', 'content': 0.1840095818042755, 'timestamp': '2025-10-01 04:13:32.632654', 'step': 860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:32.662138', 'step': 860, 'epoch': 1} {'type': 'loss', 'content': 0.14304201304912567, 'timestamp': '2025-10-01 04:13:32.664052', 'step': 861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:32.693697', 'step': 861, 'epoch': 1} {'type': 'loss', 'content': 0.07485344260931015, 'timestamp': '2025-10-01 04:13:32.703361', 'step': 862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:32.733097', 'step': 862, 'epoch': 1} {'type': 'loss', 'content': 0.27260422706604004, 'timestamp': '2025-10-01 04:13:32.735655', 'step': 863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:32.765279', 'step': 863, 'epoch': 1} {'type': 'loss', 'content': 0.14609192311763763, 'timestamp': '2025-10-01 04:13:32.788639', 'step': 864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:32.819034', 'step': 864, 'epoch': 1} {'type': 'loss', 'content': 0.1905221939086914, 'timestamp': '2025-10-01 04:13:32.821008', 'step': 865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:32.851101', 'step': 865, 'epoch': 1} {'type': 'loss', 'content': 0.22133396565914154, 'timestamp': '2025-10-01 04:13:32.853011', 'step': 866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:32.882014', 'step': 866, 'epoch': 1} {'type': 'loss', 'content': 0.18789532780647278, 'timestamp': '2025-10-01 04:13:32.884509', 'step': 867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:32.914164', 'step': 867, 'epoch': 1} {'type': 'loss', 'content': 0.2270176112651825, 'timestamp': '2025-10-01 04:13:32.937728', 'step': 868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:32.975949', 'step': 868, 'epoch': 1} {'type': 'loss', 'content': 0.12713047862052917, 'timestamp': '2025-10-01 04:13:32.978620', 'step': 869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:33.008089', 'step': 869, 'epoch': 1} {'type': 'loss', 'content': 0.21919073164463043, 'timestamp': '2025-10-01 04:13:33.009990', 'step': 870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:33.039687', 'step': 870, 'epoch': 1} {'type': 'loss', 'content': 0.10280489921569824, 'timestamp': '2025-10-01 04:13:33.042144', 'step': 871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:33.073294', 'step': 871, 'epoch': 1} {'type': 'loss', 'content': 0.15635333955287933, 'timestamp': '2025-10-01 04:13:33.097860', 'step': 872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:33.128623', 'step': 872, 'epoch': 1} {'type': 'loss', 'content': 0.2256186455488205, 'timestamp': '2025-10-01 04:13:33.130649', 'step': 873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:33.164623', 'step': 873, 'epoch': 1} {'type': 'loss', 'content': 0.24899078905582428, 'timestamp': '2025-10-01 04:13:33.166572', 'step': 874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:33.199681', 'step': 874, 'epoch': 1} {'type': 'loss', 'content': 0.2119935154914856, 'timestamp': '2025-10-01 04:13:33.201665', 'step': 875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:33.232633', 'step': 875, 'epoch': 1} {'type': 'loss', 'content': 0.1737862527370453, 'timestamp': '2025-10-01 04:13:33.256114', 'step': 876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:33.286540', 'step': 876, 'epoch': 1} {'type': 'loss', 'content': 0.178695946931839, 'timestamp': '2025-10-01 04:13:33.288518', 'step': 877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:33.317822', 'step': 877, 'epoch': 1} {'type': 'loss', 'content': 0.16150839626789093, 'timestamp': '2025-10-01 04:13:33.319844', 'step': 878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:33.350670', 'step': 878, 'epoch': 1} {'type': 'loss', 'content': 0.16168120503425598, 'timestamp': '2025-10-01 04:13:33.358134', 'step': 879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:33.387868', 'step': 879, 'epoch': 1} {'type': 'loss', 'content': 0.14517872035503387, 'timestamp': '2025-10-01 04:13:33.411301', 'step': 880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:33.440414', 'step': 880, 'epoch': 1} {'type': 'loss', 'content': 0.1463204026222229, 'timestamp': '2025-10-01 04:13:33.442388', 'step': 881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:33.472066', 'step': 881, 'epoch': 1} {'type': 'loss', 'content': 0.23681971430778503, 'timestamp': '2025-10-01 04:13:33.473869', 'step': 882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:33.503538', 'step': 882, 'epoch': 1} {'type': 'loss', 'content': 0.17259109020233154, 'timestamp': '2025-10-01 04:13:33.505840', 'step': 883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:33.543033', 'step': 883, 'epoch': 1} {'type': 'loss', 'content': 0.20936846733093262, 'timestamp': '2025-10-01 04:13:33.577525', 'step': 884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:33.607896', 'step': 884, 'epoch': 1} {'type': 'loss', 'content': 0.1647792011499405, 'timestamp': '2025-10-01 04:13:33.609971', 'step': 885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:33.639117', 'step': 885, 'epoch': 1} {'type': 'loss', 'content': 0.34907105565071106, 'timestamp': '2025-10-01 04:13:33.641088', 'step': 886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:33.670929', 'step': 886, 'epoch': 1} {'type': 'loss', 'content': 0.24083293974399567, 'timestamp': '2025-10-01 04:13:33.673887', 'step': 887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:33.703798', 'step': 887, 'epoch': 1} {'type': 'loss', 'content': 0.27904149889945984, 'timestamp': '2025-10-01 04:13:33.727931', 'step': 888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:33.757234', 'step': 888, 'epoch': 1} {'type': 'loss', 'content': 0.23478935658931732, 'timestamp': '2025-10-01 04:13:33.759160', 'step': 889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:33.788681', 'step': 889, 'epoch': 1} {'type': 'loss', 'content': 0.12447044998407364, 'timestamp': '2025-10-01 04:13:33.791822', 'step': 890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:33.823224', 'step': 890, 'epoch': 1} {'type': 'loss', 'content': 0.25544270873069763, 'timestamp': '2025-10-01 04:13:33.825244', 'step': 891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:33.855778', 'step': 891, 'epoch': 1} {'type': 'loss', 'content': 0.1888897716999054, 'timestamp': '2025-10-01 04:13:33.880899', 'step': 892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:33.910769', 'step': 892, 'epoch': 1} {'type': 'loss', 'content': 0.14248551428318024, 'timestamp': '2025-10-01 04:13:33.912722', 'step': 893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:33.942421', 'step': 893, 'epoch': 1} {'type': 'loss', 'content': 0.19824214279651642, 'timestamp': '2025-10-01 04:13:33.944334', 'step': 894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:33.974108', 'step': 894, 'epoch': 1} {'type': 'loss', 'content': 0.13008737564086914, 'timestamp': '2025-10-01 04:13:33.976103', 'step': 895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:34.006181', 'step': 895, 'epoch': 1} {'type': 'loss', 'content': 0.20681001245975494, 'timestamp': '2025-10-01 04:13:34.029683', 'step': 896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:34.061809', 'step': 896, 'epoch': 1} {'type': 'loss', 'content': 0.18619729578495026, 'timestamp': '2025-10-01 04:13:34.063685', 'step': 897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:34.093213', 'step': 897, 'epoch': 1} {'type': 'loss', 'content': 0.16412487626075745, 'timestamp': '2025-10-01 04:13:34.095381', 'step': 898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:34.125628', 'step': 898, 'epoch': 1} {'type': 'loss', 'content': 0.2393195629119873, 'timestamp': '2025-10-01 04:13:34.127694', 'step': 899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:34.157622', 'step': 899, 'epoch': 1} {'type': 'loss', 'content': 0.37161386013031006, 'timestamp': '2025-10-01 04:13:34.185543', 'step': 900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:34.217791', 'step': 900, 'epoch': 1} {'type': 'loss', 'content': 0.1463930308818817, 'timestamp': '2025-10-01 04:13:34.219835', 'step': 901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:34.253108', 'step': 901, 'epoch': 1} {'type': 'loss', 'content': 0.1482975333929062, 'timestamp': '2025-10-01 04:13:34.255035', 'step': 902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:34.284816', 'step': 902, 'epoch': 1} {'type': 'loss', 'content': 0.16220960021018982, 'timestamp': '2025-10-01 04:13:34.287191', 'step': 903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:34.317344', 'step': 903, 'epoch': 1} {'type': 'loss', 'content': 0.20816980302333832, 'timestamp': '2025-10-01 04:13:34.340685', 'step': 904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:34.370577', 'step': 904, 'epoch': 1} {'type': 'loss', 'content': 0.1753104329109192, 'timestamp': '2025-10-01 04:13:34.372598', 'step': 905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:34.403765', 'step': 905, 'epoch': 1} {'type': 'loss', 'content': 0.21105073392391205, 'timestamp': '2025-10-01 04:13:34.406305', 'step': 906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:34.437523', 'step': 906, 'epoch': 1} {'type': 'loss', 'content': 0.14002859592437744, 'timestamp': '2025-10-01 04:13:34.439362', 'step': 907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:34.469715', 'step': 907, 'epoch': 1} {'type': 'loss', 'content': 0.21131007373332977, 'timestamp': '2025-10-01 04:13:34.494351', 'step': 908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:34.531896', 'step': 908, 'epoch': 1} {'type': 'loss', 'content': 0.2753852903842926, 'timestamp': '2025-10-01 04:13:34.533899', 'step': 909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:34.565618', 'step': 909, 'epoch': 1} {'type': 'loss', 'content': 0.1791331022977829, 'timestamp': '2025-10-01 04:13:34.567525', 'step': 910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:34.596843', 'step': 910, 'epoch': 1} {'type': 'loss', 'content': 0.2147938758134842, 'timestamp': '2025-10-01 04:13:34.598681', 'step': 911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:34.628238', 'step': 911, 'epoch': 1} {'type': 'loss', 'content': 0.22355319559574127, 'timestamp': '2025-10-01 04:13:34.651778', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:34.686681', 'step': 912, 'epoch': 1} {'type': 'loss', 'content': 0.15339869260787964, 'timestamp': '2025-10-01 04:13:34.688817', 'step': 913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:34.718542', 'step': 913, 'epoch': 1} {'type': 'loss', 'content': 0.08833153545856476, 'timestamp': '2025-10-01 04:13:34.721543', 'step': 914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:13:34.751085', 'step': 914, 'epoch': 1} {'type': 'loss', 'content': 0.25892964005470276, 'timestamp': '2025-10-01 04:13:34.758458', 'step': 915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:34.788685', 'step': 915, 'epoch': 1} {'type': 'loss', 'content': 0.18523277342319489, 'timestamp': '2025-10-01 04:13:34.812053', 'step': 916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:34.842567', 'step': 916, 'epoch': 1} {'type': 'loss', 'content': 0.3073110580444336, 'timestamp': '2025-10-01 04:13:34.844477', 'step': 917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:34.873742', 'step': 917, 'epoch': 1} {'type': 'loss', 'content': 0.16972582042217255, 'timestamp': '2025-10-01 04:13:34.875745', 'step': 918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:34.910681', 'step': 918, 'epoch': 1} {'type': 'loss', 'content': 0.31232157349586487, 'timestamp': '2025-10-01 04:13:34.913529', 'step': 919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:34.943225', 'step': 919, 'epoch': 1} {'type': 'loss', 'content': 0.11887139827013016, 'timestamp': '2025-10-01 04:13:34.972698', 'step': 920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:35.002621', 'step': 920, 'epoch': 1} {'type': 'loss', 'content': 0.2404967099428177, 'timestamp': '2025-10-01 04:13:35.004493', 'step': 921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:35.033612', 'step': 921, 'epoch': 1} {'type': 'loss', 'content': 0.1726158708333969, 'timestamp': '2025-10-01 04:13:35.035702', 'step': 922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:35.065243', 'step': 922, 'epoch': 1} {'type': 'loss', 'content': 0.184897318482399, 'timestamp': '2025-10-01 04:13:35.075659', 'step': 923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:35.105554', 'step': 923, 'epoch': 1} {'type': 'loss', 'content': 0.28083929419517517, 'timestamp': '2025-10-01 04:13:35.129090', 'step': 924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:35.159054', 'step': 924, 'epoch': 1} {'type': 'loss', 'content': 0.15509723126888275, 'timestamp': '2025-10-01 04:13:35.161093', 'step': 925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:35.190675', 'step': 925, 'epoch': 1} {'type': 'loss', 'content': 0.25594034790992737, 'timestamp': '2025-10-01 04:13:35.192643', 'step': 926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:35.222282', 'step': 926, 'epoch': 1} {'type': 'loss', 'content': 0.27219533920288086, 'timestamp': '2025-10-01 04:13:35.224444', 'step': 927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:35.254620', 'step': 927, 'epoch': 1} {'type': 'loss', 'content': 0.1716022938489914, 'timestamp': '2025-10-01 04:13:35.278183', 'step': 928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:35.314542', 'step': 928, 'epoch': 1} {'type': 'loss', 'content': 0.14851027727127075, 'timestamp': '2025-10-01 04:13:35.324921', 'step': 929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:35.355082', 'step': 929, 'epoch': 1} {'type': 'loss', 'content': 0.19076648354530334, 'timestamp': '2025-10-01 04:13:35.362222', 'step': 930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:35.396295', 'step': 930, 'epoch': 1} {'type': 'loss', 'content': 0.15663614869117737, 'timestamp': '2025-10-01 04:13:35.398202', 'step': 931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:35.446631', 'step': 931, 'epoch': 1} {'type': 'loss', 'content': 0.3295857608318329, 'timestamp': '2025-10-01 04:13:35.470007', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:13:45.447919', 'step': 932, 'epoch': 1} {'type': 'pplx', 'content': 7734.8395379306885, 'timestamp': '2025-10-01 04:13:45.450739', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:45.484881', 'step': 932, 'epoch': 1} {'type': 'loss', 'content': 0.11962587386369705, 'timestamp': '2025-10-01 04:13:45.487176', 'step': 933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:45.521017', 'step': 933, 'epoch': 1} {'type': 'loss', 'content': 0.1466418355703354, 'timestamp': '2025-10-01 04:13:45.523238', 'step': 934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:45.555242', 'step': 934, 'epoch': 1} {'type': 'loss', 'content': 0.2697294354438782, 'timestamp': '2025-10-01 04:13:45.557413', 'step': 935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:45.592393', 'step': 935, 'epoch': 1} {'type': 'loss', 'content': 0.14460717141628265, 'timestamp': '2025-10-01 04:13:45.617015', 'step': 936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:45.652812', 'step': 936, 'epoch': 1} {'type': 'loss', 'content': 0.2340499311685562, 'timestamp': '2025-10-01 04:13:45.655269', 'step': 937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:45.687592', 'step': 937, 'epoch': 1} {'type': 'loss', 'content': 0.29364272952079773, 'timestamp': '2025-10-01 04:13:45.689598', 'step': 938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:13:45.721752', 'step': 938, 'epoch': 1} {'type': 'loss', 'content': 0.22158312797546387, 'timestamp': '2025-10-01 04:13:45.726016', 'step': 939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:45.765310', 'step': 939, 'epoch': 1} {'type': 'loss', 'content': 0.22151437401771545, 'timestamp': '2025-10-01 04:13:45.789153', 'step': 940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:45.820756', 'step': 940, 'epoch': 1} {'type': 'loss', 'content': 0.15099848806858063, 'timestamp': '2025-10-01 04:13:45.822854', 'step': 941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:45.861733', 'step': 941, 'epoch': 1} {'type': 'loss', 'content': 0.24977557361125946, 'timestamp': '2025-10-01 04:13:45.863785', 'step': 942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:45.895561', 'step': 942, 'epoch': 1} {'type': 'loss', 'content': 0.2125953733921051, 'timestamp': '2025-10-01 04:13:45.897509', 'step': 943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:45.938086', 'step': 943, 'epoch': 1} {'type': 'loss', 'content': 0.14487318694591522, 'timestamp': '2025-10-01 04:13:45.961337', 'step': 944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:45.994146', 'step': 944, 'epoch': 1} {'type': 'loss', 'content': 0.18982063233852386, 'timestamp': '2025-10-01 04:13:45.996330', 'step': 945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:46.029684', 'step': 945, 'epoch': 1} {'type': 'loss', 'content': 0.12267941981554031, 'timestamp': '2025-10-01 04:13:46.031696', 'step': 946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.068226', 'step': 946, 'epoch': 1} {'type': 'loss', 'content': 0.25871360301971436, 'timestamp': '2025-10-01 04:13:46.073220', 'step': 947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:46.104450', 'step': 947, 'epoch': 1} {'type': 'loss', 'content': 0.23776713013648987, 'timestamp': '2025-10-01 04:13:46.127776', 'step': 948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:46.161093', 'step': 948, 'epoch': 1} {'type': 'loss', 'content': 0.20729435980319977, 'timestamp': '2025-10-01 04:13:46.163057', 'step': 949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.199611', 'step': 949, 'epoch': 1} {'type': 'loss', 'content': 0.17206594347953796, 'timestamp': '2025-10-01 04:13:46.201658', 'step': 950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:46.237294', 'step': 950, 'epoch': 1} {'type': 'loss', 'content': 0.22935304045677185, 'timestamp': '2025-10-01 04:13:46.239546', 'step': 951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:46.277997', 'step': 951, 'epoch': 1} {'type': 'loss', 'content': 0.1920337975025177, 'timestamp': '2025-10-01 04:13:46.310010', 'step': 952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.352854', 'step': 952, 'epoch': 1} {'type': 'loss', 'content': 0.11508174985647202, 'timestamp': '2025-10-01 04:13:46.354868', 'step': 953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:46.385691', 'step': 953, 'epoch': 1} {'type': 'loss', 'content': 0.13519537448883057, 'timestamp': '2025-10-01 04:13:46.387613', 'step': 954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.431517', 'step': 954, 'epoch': 1} {'type': 'loss', 'content': 0.15005402266979218, 'timestamp': '2025-10-01 04:13:46.434592', 'step': 955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.473598', 'step': 955, 'epoch': 1} {'type': 'loss', 'content': 0.12855389714241028, 'timestamp': '2025-10-01 04:13:46.508717', 'step': 956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.544628', 'step': 956, 'epoch': 1} {'type': 'loss', 'content': 0.20637835562229156, 'timestamp': '2025-10-01 04:13:46.547603', 'step': 957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:46.578897', 'step': 957, 'epoch': 1} {'type': 'loss', 'content': 0.19211703538894653, 'timestamp': '2025-10-01 04:13:46.591847', 'step': 958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.623939', 'step': 958, 'epoch': 1} {'type': 'loss', 'content': 0.24279989302158356, 'timestamp': '2025-10-01 04:13:46.625913', 'step': 959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.666441', 'step': 959, 'epoch': 1} {'type': 'loss', 'content': 0.17298083007335663, 'timestamp': '2025-10-01 04:13:46.689894', 'step': 960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.726206', 'step': 960, 'epoch': 1} {'type': 'loss', 'content': 0.1588839292526245, 'timestamp': '2025-10-01 04:13:46.729003', 'step': 961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:46.767870', 'step': 961, 'epoch': 1} {'type': 'loss', 'content': 0.16993685066699982, 'timestamp': '2025-10-01 04:13:46.769922', 'step': 962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.802103', 'step': 962, 'epoch': 1} {'type': 'loss', 'content': 0.18877920508384705, 'timestamp': '2025-10-01 04:13:46.804049', 'step': 963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.835480', 'step': 963, 'epoch': 1} {'type': 'loss', 'content': 0.2456887811422348, 'timestamp': '2025-10-01 04:13:46.860003', 'step': 964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:46.894271', 'step': 964, 'epoch': 1} {'type': 'loss', 'content': 0.20180419087409973, 'timestamp': '2025-10-01 04:13:46.896686', 'step': 965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:46.939057', 'step': 965, 'epoch': 1} {'type': 'loss', 'content': 0.21579578518867493, 'timestamp': '2025-10-01 04:13:46.942445', 'step': 966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:46.982819', 'step': 966, 'epoch': 1} {'type': 'loss', 'content': 0.2048819363117218, 'timestamp': '2025-10-01 04:13:46.989361', 'step': 967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:47.021770', 'step': 967, 'epoch': 1} {'type': 'loss', 'content': 0.2540442645549774, 'timestamp': '2025-10-01 04:13:47.045151', 'step': 968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:47.077481', 'step': 968, 'epoch': 1} {'type': 'loss', 'content': 0.1654854416847229, 'timestamp': '2025-10-01 04:13:47.079930', 'step': 969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:47.114391', 'step': 969, 'epoch': 1} {'type': 'loss', 'content': 0.27100980281829834, 'timestamp': '2025-10-01 04:13:47.116338', 'step': 970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:47.150572', 'step': 970, 'epoch': 1} {'type': 'loss', 'content': 0.15433521568775177, 'timestamp': '2025-10-01 04:13:47.152864', 'step': 971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:47.199145', 'step': 971, 'epoch': 1} {'type': 'loss', 'content': 0.27757301926612854, 'timestamp': '2025-10-01 04:13:47.223845', 'step': 972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:47.259227', 'step': 972, 'epoch': 1} {'type': 'loss', 'content': 0.19555670022964478, 'timestamp': '2025-10-01 04:13:47.261665', 'step': 973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:47.294527', 'step': 973, 'epoch': 1} {'type': 'loss', 'content': 0.15485113859176636, 'timestamp': '2025-10-01 04:13:47.296547', 'step': 974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:47.329815', 'step': 974, 'epoch': 1} {'type': 'loss', 'content': 0.242889404296875, 'timestamp': '2025-10-01 04:13:47.331898', 'step': 975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:47.370913', 'step': 975, 'epoch': 1} {'type': 'loss', 'content': 0.17175278067588806, 'timestamp': '2025-10-01 04:13:47.394376', 'step': 976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:47.431597', 'step': 976, 'epoch': 1} {'type': 'loss', 'content': 0.18092884123325348, 'timestamp': '2025-10-01 04:13:47.433350', 'step': 977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:47.465216', 'step': 977, 'epoch': 1} {'type': 'loss', 'content': 0.21599730849266052, 'timestamp': '2025-10-01 04:13:47.467448', 'step': 978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:47.499936', 'step': 978, 'epoch': 1} {'type': 'loss', 'content': 0.2327686995267868, 'timestamp': '2025-10-01 04:13:47.501694', 'step': 979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:47.542316', 'step': 979, 'epoch': 1} {'type': 'loss', 'content': 0.18834228813648224, 'timestamp': '2025-10-01 04:13:47.566010', 'step': 980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:47.598263', 'step': 980, 'epoch': 1} {'type': 'loss', 'content': 0.13161244988441467, 'timestamp': '2025-10-01 04:13:47.600665', 'step': 981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:47.634109', 'step': 981, 'epoch': 1} {'type': 'loss', 'content': 0.13273285329341888, 'timestamp': '2025-10-01 04:13:47.636126', 'step': 982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:47.668790', 'step': 982, 'epoch': 1} {'type': 'loss', 'content': 0.13642315566539764, 'timestamp': '2025-10-01 04:13:47.671406', 'step': 983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:47.705803', 'step': 983, 'epoch': 1} {'type': 'loss', 'content': 0.32094889879226685, 'timestamp': '2025-10-01 04:13:47.729518', 'step': 984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:47.763476', 'step': 984, 'epoch': 1} {'type': 'loss', 'content': 0.17879869043827057, 'timestamp': '2025-10-01 04:13:47.765375', 'step': 985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:47.805177', 'step': 985, 'epoch': 1} {'type': 'loss', 'content': 0.08852309733629227, 'timestamp': '2025-10-01 04:13:47.808173', 'step': 986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:47.850834', 'step': 986, 'epoch': 1} {'type': 'loss', 'content': 0.14291948080062866, 'timestamp': '2025-10-01 04:13:47.852636', 'step': 987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:47.893059', 'step': 987, 'epoch': 1} {'type': 'loss', 'content': 0.24329052865505219, 'timestamp': '2025-10-01 04:13:47.916567', 'step': 988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:47.949855', 'step': 988, 'epoch': 1} {'type': 'loss', 'content': 0.22609342634677887, 'timestamp': '2025-10-01 04:13:47.951744', 'step': 989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:47.993955', 'step': 989, 'epoch': 1} {'type': 'loss', 'content': 0.16906023025512695, 'timestamp': '2025-10-01 04:13:47.995905', 'step': 990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:48.038690', 'step': 990, 'epoch': 1} {'type': 'loss', 'content': 0.10923931002616882, 'timestamp': '2025-10-01 04:13:48.040634', 'step': 991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:48.072787', 'step': 991, 'epoch': 1} {'type': 'loss', 'content': 0.18490317463874817, 'timestamp': '2025-10-01 04:13:48.096266', 'step': 992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:48.130933', 'step': 992, 'epoch': 1} {'type': 'loss', 'content': 0.20228256285190582, 'timestamp': '2025-10-01 04:13:48.132895', 'step': 993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:48.164447', 'step': 993, 'epoch': 1} {'type': 'loss', 'content': 0.24351467192173004, 'timestamp': '2025-10-01 04:13:48.166930', 'step': 994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:48.204430', 'step': 994, 'epoch': 1} {'type': 'loss', 'content': 0.1573912352323532, 'timestamp': '2025-10-01 04:13:48.206312', 'step': 995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:48.245029', 'step': 995, 'epoch': 1} {'type': 'loss', 'content': 0.22371192276477814, 'timestamp': '2025-10-01 04:13:48.268516', 'step': 996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:48.305859', 'step': 996, 'epoch': 1} {'type': 'loss', 'content': 0.17009051144123077, 'timestamp': '2025-10-01 04:13:48.307900', 'step': 997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:48.345711', 'step': 997, 'epoch': 1} {'type': 'loss', 'content': 0.19875691831111908, 'timestamp': '2025-10-01 04:13:48.347638', 'step': 998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:48.379960', 'step': 998, 'epoch': 1} {'type': 'loss', 'content': 0.18161088228225708, 'timestamp': '2025-10-01 04:13:48.381903', 'step': 999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:48.414908', 'step': 999, 'epoch': 1} {'type': 'loss', 'content': 0.15320546925067902, 'timestamp': '2025-10-01 04:13:48.438381', 'step': 1000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-10-01 04:13:53.465133', 'step': 1000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:53.495949', 'step': 1000, 'epoch': 1} {'type': 'loss', 'content': 0.17367176711559296, 'timestamp': '2025-10-01 04:13:53.497743', 'step': 1001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:53.527973', 'step': 1001, 'epoch': 1} {'type': 'loss', 'content': 0.2275356650352478, 'timestamp': '2025-10-01 04:13:53.529751', 'step': 1002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:53.560179', 'step': 1002, 'epoch': 1} {'type': 'loss', 'content': 0.29590946435928345, 'timestamp': '2025-10-01 04:13:53.562161', 'step': 1003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:53.593678', 'step': 1003, 'epoch': 1} {'type': 'loss', 'content': 0.14950236678123474, 'timestamp': '2025-10-01 04:13:53.617234', 'step': 1004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:53.648793', 'step': 1004, 'epoch': 1} {'type': 'loss', 'content': 0.19275811314582825, 'timestamp': '2025-10-01 04:13:53.650740', 'step': 1005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:53.680154', 'step': 1005, 'epoch': 1} {'type': 'loss', 'content': 0.1609838604927063, 'timestamp': '2025-10-01 04:13:53.683216', 'step': 1006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:53.713046', 'step': 1006, 'epoch': 1} {'type': 'loss', 'content': 0.2057788372039795, 'timestamp': '2025-10-01 04:13:53.715184', 'step': 1007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:53.744917', 'step': 1007, 'epoch': 1} {'type': 'loss', 'content': 0.1501598209142685, 'timestamp': '2025-10-01 04:13:53.768532', 'step': 1008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:53.807528', 'step': 1008, 'epoch': 1} {'type': 'loss', 'content': 0.23144450783729553, 'timestamp': '2025-10-01 04:13:53.810281', 'step': 1009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:53.840016', 'step': 1009, 'epoch': 1} {'type': 'loss', 'content': 0.127912700176239, 'timestamp': '2025-10-01 04:13:53.842061', 'step': 1010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:53.871618', 'step': 1010, 'epoch': 1} {'type': 'loss', 'content': 0.2402452677488327, 'timestamp': '2025-10-01 04:13:53.874008', 'step': 1011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:53.903617', 'step': 1011, 'epoch': 1} {'type': 'loss', 'content': 0.20104116201400757, 'timestamp': '2025-10-01 04:13:53.927361', 'step': 1012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:53.967308', 'step': 1012, 'epoch': 1} {'type': 'loss', 'content': 0.23337353765964508, 'timestamp': '2025-10-01 04:13:53.969096', 'step': 1013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:54.008895', 'step': 1013, 'epoch': 1} {'type': 'loss', 'content': 0.19013738632202148, 'timestamp': '2025-10-01 04:13:54.010899', 'step': 1014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:54.040624', 'step': 1014, 'epoch': 1} {'type': 'loss', 'content': 0.12068413943052292, 'timestamp': '2025-10-01 04:13:54.042575', 'step': 1015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:54.074332', 'step': 1015, 'epoch': 1} {'type': 'loss', 'content': 0.3017818033695221, 'timestamp': '2025-10-01 04:13:54.097924', 'step': 1016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:54.128454', 'step': 1016, 'epoch': 1} {'type': 'loss', 'content': 0.14731164276599884, 'timestamp': '2025-10-01 04:13:54.130096', 'step': 1017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:54.162747', 'step': 1017, 'epoch': 1} {'type': 'loss', 'content': 0.12589973211288452, 'timestamp': '2025-10-01 04:13:54.165338', 'step': 1018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:54.195270', 'step': 1018, 'epoch': 1} {'type': 'loss', 'content': 0.14710628986358643, 'timestamp': '2025-10-01 04:13:54.197253', 'step': 1019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:54.227977', 'step': 1019, 'epoch': 1} {'type': 'loss', 'content': 0.21340210735797882, 'timestamp': '2025-10-01 04:13:54.251436', 'step': 1020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:54.281685', 'step': 1020, 'epoch': 1} {'type': 'loss', 'content': 0.16913612186908722, 'timestamp': '2025-10-01 04:13:54.284265', 'step': 1021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:54.315241', 'step': 1021, 'epoch': 1} {'type': 'loss', 'content': 0.2826853096485138, 'timestamp': '2025-10-01 04:13:54.317038', 'step': 1022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:54.346623', 'step': 1022, 'epoch': 1} {'type': 'loss', 'content': 0.18533103168010712, 'timestamp': '2025-10-01 04:13:54.348448', 'step': 1023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:54.378329', 'step': 1023, 'epoch': 1} {'type': 'loss', 'content': 0.1578351855278015, 'timestamp': '2025-10-01 04:13:54.401970', 'step': 1024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:54.431185', 'step': 1024, 'epoch': 1} {'type': 'loss', 'content': 0.1325499266386032, 'timestamp': '2025-10-01 04:13:54.433161', 'step': 1025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:54.464039', 'step': 1025, 'epoch': 1} {'type': 'loss', 'content': 0.1988786906003952, 'timestamp': '2025-10-01 04:13:54.465791', 'step': 1026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:54.496129', 'step': 1026, 'epoch': 1} {'type': 'loss', 'content': 0.14922572672367096, 'timestamp': '2025-10-01 04:13:54.498019', 'step': 1027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:54.529419', 'step': 1027, 'epoch': 1} {'type': 'loss', 'content': 0.267701119184494, 'timestamp': '2025-10-01 04:13:54.553587', 'step': 1028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:54.583238', 'step': 1028, 'epoch': 1} {'type': 'loss', 'content': 0.2590726315975189, 'timestamp': '2025-10-01 04:13:54.585347', 'step': 1029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:54.615440', 'step': 1029, 'epoch': 1} {'type': 'loss', 'content': 0.14726270735263824, 'timestamp': '2025-10-01 04:13:54.618080', 'step': 1030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:54.647992', 'step': 1030, 'epoch': 1} {'type': 'loss', 'content': 0.2938997149467468, 'timestamp': '2025-10-01 04:13:54.649909', 'step': 1031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:54.679854', 'step': 1031, 'epoch': 1} {'type': 'loss', 'content': 0.18657025694847107, 'timestamp': '2025-10-01 04:13:54.704283', 'step': 1032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:54.736995', 'step': 1032, 'epoch': 1} {'type': 'loss', 'content': 0.27284950017929077, 'timestamp': '2025-10-01 04:13:54.738797', 'step': 1033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:54.768588', 'step': 1033, 'epoch': 1} {'type': 'loss', 'content': 0.14654415845870972, 'timestamp': '2025-10-01 04:13:54.770315', 'step': 1034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:54.799974', 'step': 1034, 'epoch': 1} {'type': 'loss', 'content': 0.2845858037471771, 'timestamp': '2025-10-01 04:13:54.801569', 'step': 1035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:54.832604', 'step': 1035, 'epoch': 1} {'type': 'loss', 'content': 0.2545631229877472, 'timestamp': '2025-10-01 04:13:54.855949', 'step': 1036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:54.884911', 'step': 1036, 'epoch': 1} {'type': 'loss', 'content': 0.21617640554904938, 'timestamp': '2025-10-01 04:13:54.886855', 'step': 1037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:54.917330', 'step': 1037, 'epoch': 1} {'type': 'loss', 'content': 0.15990248322486877, 'timestamp': '2025-10-01 04:13:54.919327', 'step': 1038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:54.949290', 'step': 1038, 'epoch': 1} {'type': 'loss', 'content': 0.12361235171556473, 'timestamp': '2025-10-01 04:13:54.951424', 'step': 1039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:13:54.981112', 'step': 1039, 'epoch': 1} {'type': 'loss', 'content': 0.2161618322134018, 'timestamp': '2025-10-01 04:13:55.006270', 'step': 1040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:55.036853', 'step': 1040, 'epoch': 1} {'type': 'loss', 'content': 0.24640117585659027, 'timestamp': '2025-10-01 04:13:55.038401', 'step': 1041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:55.068091', 'step': 1041, 'epoch': 1} {'type': 'loss', 'content': 0.23218823969364166, 'timestamp': '2025-10-01 04:13:55.070549', 'step': 1042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:55.101369', 'step': 1042, 'epoch': 1} {'type': 'loss', 'content': 0.15357226133346558, 'timestamp': '2025-10-01 04:13:55.103654', 'step': 1043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:55.133449', 'step': 1043, 'epoch': 1} {'type': 'loss', 'content': 0.22302620112895966, 'timestamp': '2025-10-01 04:13:55.157117', 'step': 1044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:55.187547', 'step': 1044, 'epoch': 1} {'type': 'loss', 'content': 0.15919019281864166, 'timestamp': '2025-10-01 04:13:55.189322', 'step': 1045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:55.226633', 'step': 1045, 'epoch': 1} {'type': 'loss', 'content': 0.10736053436994553, 'timestamp': '2025-10-01 04:13:55.236452', 'step': 1046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:55.269549', 'step': 1046, 'epoch': 1} {'type': 'loss', 'content': 0.21194183826446533, 'timestamp': '2025-10-01 04:13:55.271112', 'step': 1047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:55.301427', 'step': 1047, 'epoch': 1} {'type': 'loss', 'content': 0.23182113468647003, 'timestamp': '2025-10-01 04:13:55.325063', 'step': 1048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:55.359851', 'step': 1048, 'epoch': 1} {'type': 'loss', 'content': 0.24333254992961884, 'timestamp': '2025-10-01 04:13:55.362865', 'step': 1049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:55.392212', 'step': 1049, 'epoch': 1} {'type': 'loss', 'content': 0.17677588760852814, 'timestamp': '2025-10-01 04:13:55.394354', 'step': 1050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:55.423754', 'step': 1050, 'epoch': 1} {'type': 'loss', 'content': 0.14351493120193481, 'timestamp': '2025-10-01 04:13:55.425538', 'step': 1051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:55.455122', 'step': 1051, 'epoch': 1} {'type': 'loss', 'content': 0.15164220333099365, 'timestamp': '2025-10-01 04:13:55.478643', 'step': 1052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:55.508937', 'step': 1052, 'epoch': 1} {'type': 'loss', 'content': 0.17439496517181396, 'timestamp': '2025-10-01 04:13:55.510807', 'step': 1053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:55.540397', 'step': 1053, 'epoch': 1} {'type': 'loss', 'content': 0.15475241839885712, 'timestamp': '2025-10-01 04:13:55.542181', 'step': 1054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:55.575587', 'step': 1054, 'epoch': 1} {'type': 'loss', 'content': 0.13579979538917542, 'timestamp': '2025-10-01 04:13:55.577764', 'step': 1055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:13:55.608153', 'step': 1055, 'epoch': 1} {'type': 'loss', 'content': 0.24551436305046082, 'timestamp': '2025-10-01 04:13:55.633358', 'step': 1056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:55.663553', 'step': 1056, 'epoch': 1} {'type': 'loss', 'content': 0.26992252469062805, 'timestamp': '2025-10-01 04:13:55.665318', 'step': 1057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:55.694155', 'step': 1057, 'epoch': 1} {'type': 'loss', 'content': 0.17636945843696594, 'timestamp': '2025-10-01 04:13:55.696710', 'step': 1058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:55.726797', 'step': 1058, 'epoch': 1} {'type': 'loss', 'content': 0.20004600286483765, 'timestamp': '2025-10-01 04:13:55.728903', 'step': 1059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:55.758519', 'step': 1059, 'epoch': 1} {'type': 'loss', 'content': 0.2600114643573761, 'timestamp': '2025-10-01 04:13:55.781613', 'step': 1060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:55.810926', 'step': 1060, 'epoch': 1} {'type': 'loss', 'content': 0.3176555931568146, 'timestamp': '2025-10-01 04:13:55.812515', 'step': 1061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:55.840816', 'step': 1061, 'epoch': 1} {'type': 'loss', 'content': 0.14151717722415924, 'timestamp': '2025-10-01 04:13:55.842353', 'step': 1062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:55.871690', 'step': 1062, 'epoch': 1} {'type': 'loss', 'content': 0.16114991903305054, 'timestamp': '2025-10-01 04:13:55.873759', 'step': 1063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:55.903461', 'step': 1063, 'epoch': 1} {'type': 'loss', 'content': 0.17639942467212677, 'timestamp': '2025-10-01 04:13:55.928381', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:55.957729', 'step': 1064, 'epoch': 1} {'type': 'loss', 'content': 0.18127310276031494, 'timestamp': '2025-10-01 04:13:55.960067', 'step': 1065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:55.989849', 'step': 1065, 'epoch': 1} {'type': 'loss', 'content': 0.21017032861709595, 'timestamp': '2025-10-01 04:13:55.994379', 'step': 1066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:56.024832', 'step': 1066, 'epoch': 1} {'type': 'loss', 'content': 0.2096058428287506, 'timestamp': '2025-10-01 04:13:56.027267', 'step': 1067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:56.057050', 'step': 1067, 'epoch': 1} {'type': 'loss', 'content': 0.18136659264564514, 'timestamp': '2025-10-01 04:13:56.080565', 'step': 1068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:56.115103', 'step': 1068, 'epoch': 1} {'type': 'loss', 'content': 0.14962586760520935, 'timestamp': '2025-10-01 04:13:56.116985', 'step': 1069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:56.147061', 'step': 1069, 'epoch': 1} {'type': 'loss', 'content': 0.18173755705356598, 'timestamp': '2025-10-01 04:13:56.149111', 'step': 1070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:56.178879', 'step': 1070, 'epoch': 1} {'type': 'loss', 'content': 0.22364528477191925, 'timestamp': '2025-10-01 04:13:56.181033', 'step': 1071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:56.211607', 'step': 1071, 'epoch': 1} {'type': 'loss', 'content': 0.20378455519676208, 'timestamp': '2025-10-01 04:13:56.235455', 'step': 1072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:56.267255', 'step': 1072, 'epoch': 1} {'type': 'loss', 'content': 0.17777185142040253, 'timestamp': '2025-10-01 04:13:56.269121', 'step': 1073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:56.298131', 'step': 1073, 'epoch': 1} {'type': 'loss', 'content': 0.18534229695796967, 'timestamp': '2025-10-01 04:13:56.300250', 'step': 1074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:56.329845', 'step': 1074, 'epoch': 1} {'type': 'loss', 'content': 0.1746928095817566, 'timestamp': '2025-10-01 04:13:56.332379', 'step': 1075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:56.369692', 'step': 1075, 'epoch': 1} {'type': 'loss', 'content': 0.12846796214580536, 'timestamp': '2025-10-01 04:13:56.392808', 'step': 1076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:56.423283', 'step': 1076, 'epoch': 1} {'type': 'loss', 'content': 0.2587526738643646, 'timestamp': '2025-10-01 04:13:56.425133', 'step': 1077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:56.462222', 'step': 1077, 'epoch': 1} {'type': 'loss', 'content': 0.18198548257350922, 'timestamp': '2025-10-01 04:13:56.464146', 'step': 1078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:56.494843', 'step': 1078, 'epoch': 1} {'type': 'loss', 'content': 0.10936079919338226, 'timestamp': '2025-10-01 04:13:56.496824', 'step': 1079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:56.526585', 'step': 1079, 'epoch': 1} {'type': 'loss', 'content': 0.25788307189941406, 'timestamp': '2025-10-01 04:13:56.549835', 'step': 1080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:56.580024', 'step': 1080, 'epoch': 1} {'type': 'loss', 'content': 0.13768447935581207, 'timestamp': '2025-10-01 04:13:56.582700', 'step': 1081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:56.613057', 'step': 1081, 'epoch': 1} {'type': 'loss', 'content': 0.14298605918884277, 'timestamp': '2025-10-01 04:13:56.614925', 'step': 1082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:56.652956', 'step': 1082, 'epoch': 1} {'type': 'loss', 'content': 0.265857458114624, 'timestamp': '2025-10-01 04:13:56.655046', 'step': 1083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:56.704270', 'step': 1083, 'epoch': 1} {'type': 'loss', 'content': 0.08966318517923355, 'timestamp': '2025-10-01 04:13:56.727826', 'step': 1084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:56.773129', 'step': 1084, 'epoch': 1} {'type': 'loss', 'content': 0.21720825135707855, 'timestamp': '2025-10-01 04:13:56.775291', 'step': 1085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:56.834067', 'step': 1085, 'epoch': 1} {'type': 'loss', 'content': 0.18460924923419952, 'timestamp': '2025-10-01 04:13:56.836321', 'step': 1086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:56.880067', 'step': 1086, 'epoch': 1} {'type': 'loss', 'content': 0.18296757340431213, 'timestamp': '2025-10-01 04:13:56.882016', 'step': 1087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:56.926068', 'step': 1087, 'epoch': 1} {'type': 'loss', 'content': 0.24703730642795563, 'timestamp': '2025-10-01 04:13:56.949526', 'step': 1088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:56.994800', 'step': 1088, 'epoch': 1} {'type': 'loss', 'content': 0.12238054722547531, 'timestamp': '2025-10-01 04:13:56.996892', 'step': 1089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:57.040866', 'step': 1089, 'epoch': 1} {'type': 'loss', 'content': 0.1482236385345459, 'timestamp': '2025-10-01 04:13:57.043187', 'step': 1090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:57.100626', 'step': 1090, 'epoch': 1} {'type': 'loss', 'content': 0.1384553164243698, 'timestamp': '2025-10-01 04:13:57.102647', 'step': 1091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:57.164561', 'step': 1091, 'epoch': 1} {'type': 'loss', 'content': 0.15318194031715393, 'timestamp': '2025-10-01 04:13:57.188495', 'step': 1092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:57.226665', 'step': 1092, 'epoch': 1} {'type': 'loss', 'content': 0.1735294759273529, 'timestamp': '2025-10-01 04:13:57.228407', 'step': 1093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:57.265308', 'step': 1093, 'epoch': 1} {'type': 'loss', 'content': 0.15244492888450623, 'timestamp': '2025-10-01 04:13:57.267017', 'step': 1094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:57.307688', 'step': 1094, 'epoch': 1} {'type': 'loss', 'content': 0.20391039550304413, 'timestamp': '2025-10-01 04:13:57.310848', 'step': 1095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:57.349894', 'step': 1095, 'epoch': 1} {'type': 'loss', 'content': 0.16148769855499268, 'timestamp': '2025-10-01 04:13:57.380182', 'step': 1096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:57.421146', 'step': 1096, 'epoch': 1} {'type': 'loss', 'content': 0.24708829820156097, 'timestamp': '2025-10-01 04:13:57.423365', 'step': 1097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:57.461467', 'step': 1097, 'epoch': 1} {'type': 'loss', 'content': 0.21645641326904297, 'timestamp': '2025-10-01 04:13:57.463961', 'step': 1098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:57.499912', 'step': 1098, 'epoch': 1} {'type': 'loss', 'content': 0.1681373417377472, 'timestamp': '2025-10-01 04:13:57.517226', 'step': 1099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:57.554846', 'step': 1099, 'epoch': 1} {'type': 'loss', 'content': 0.1578422635793686, 'timestamp': '2025-10-01 04:13:57.578440', 'step': 1100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:57.619641', 'step': 1100, 'epoch': 1} {'type': 'loss', 'content': 0.1235559806227684, 'timestamp': '2025-10-01 04:13:57.621540', 'step': 1101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:57.658303', 'step': 1101, 'epoch': 1} {'type': 'loss', 'content': 0.14267756044864655, 'timestamp': '2025-10-01 04:13:57.660195', 'step': 1102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:57.698243', 'step': 1102, 'epoch': 1} {'type': 'loss', 'content': 0.21401259303092957, 'timestamp': '2025-10-01 04:13:57.710636', 'step': 1103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:57.752452', 'step': 1103, 'epoch': 1} {'type': 'loss', 'content': 0.14099560678005219, 'timestamp': '2025-10-01 04:13:57.777705', 'step': 1104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:57.809040', 'step': 1104, 'epoch': 1} {'type': 'loss', 'content': 0.23501403629779816, 'timestamp': '2025-10-01 04:13:57.810926', 'step': 1105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:57.848691', 'step': 1105, 'epoch': 1} {'type': 'loss', 'content': 0.1639287918806076, 'timestamp': '2025-10-01 04:13:57.850689', 'step': 1106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:57.888009', 'step': 1106, 'epoch': 1} {'type': 'loss', 'content': 0.21956342458724976, 'timestamp': '2025-10-01 04:13:57.889945', 'step': 1107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:57.927505', 'step': 1107, 'epoch': 1} {'type': 'loss', 'content': 0.23598125576972961, 'timestamp': '2025-10-01 04:13:57.953313', 'step': 1108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:57.993286', 'step': 1108, 'epoch': 1} {'type': 'loss', 'content': 0.16551169753074646, 'timestamp': '2025-10-01 04:13:57.995863', 'step': 1109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:58.027573', 'step': 1109, 'epoch': 1} {'type': 'loss', 'content': 0.1620999276638031, 'timestamp': '2025-10-01 04:13:58.029824', 'step': 1110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:58.070315', 'step': 1110, 'epoch': 1} {'type': 'loss', 'content': 0.14536114037036896, 'timestamp': '2025-10-01 04:13:58.073233', 'step': 1111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:13:58.109111', 'step': 1111, 'epoch': 1} {'type': 'loss', 'content': 0.2232109010219574, 'timestamp': '2025-10-01 04:13:58.134648', 'step': 1112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:58.169798', 'step': 1112, 'epoch': 1} {'type': 'loss', 'content': 0.19137059152126312, 'timestamp': '2025-10-01 04:13:58.171541', 'step': 1113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:58.204694', 'step': 1113, 'epoch': 1} {'type': 'loss', 'content': 0.16568979620933533, 'timestamp': '2025-10-01 04:13:58.206622', 'step': 1114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:58.248689', 'step': 1114, 'epoch': 1} {'type': 'loss', 'content': 0.12475878745317459, 'timestamp': '2025-10-01 04:13:58.250766', 'step': 1115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:58.285296', 'step': 1115, 'epoch': 1} {'type': 'loss', 'content': 0.1457376331090927, 'timestamp': '2025-10-01 04:13:58.309057', 'step': 1116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:58.353949', 'step': 1116, 'epoch': 1} {'type': 'loss', 'content': 0.2002539187669754, 'timestamp': '2025-10-01 04:13:58.355976', 'step': 1117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:58.392336', 'step': 1117, 'epoch': 1} {'type': 'loss', 'content': 0.20586948096752167, 'timestamp': '2025-10-01 04:13:58.394290', 'step': 1118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:58.429290', 'step': 1118, 'epoch': 1} {'type': 'loss', 'content': 0.20102503895759583, 'timestamp': '2025-10-01 04:13:58.431187', 'step': 1119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:58.466208', 'step': 1119, 'epoch': 1} {'type': 'loss', 'content': 0.15158969163894653, 'timestamp': '2025-10-01 04:13:58.489704', 'step': 1120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:58.529856', 'step': 1120, 'epoch': 1} {'type': 'loss', 'content': 0.23464436829090118, 'timestamp': '2025-10-01 04:13:58.532669', 'step': 1121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:58.564108', 'step': 1121, 'epoch': 1} {'type': 'loss', 'content': 0.20220503211021423, 'timestamp': '2025-10-01 04:13:58.566018', 'step': 1122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:58.600959', 'step': 1122, 'epoch': 1} {'type': 'loss', 'content': 0.14514850080013275, 'timestamp': '2025-10-01 04:13:58.602988', 'step': 1123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:58.635878', 'step': 1123, 'epoch': 1} {'type': 'loss', 'content': 0.2012258917093277, 'timestamp': '2025-10-01 04:13:58.660844', 'step': 1124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:58.699689', 'step': 1124, 'epoch': 1} {'type': 'loss', 'content': 0.13932755589485168, 'timestamp': '2025-10-01 04:13:58.701788', 'step': 1125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:58.738728', 'step': 1125, 'epoch': 1} {'type': 'loss', 'content': 0.1610400676727295, 'timestamp': '2025-10-01 04:13:58.740798', 'step': 1126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:13:58.787253', 'step': 1126, 'epoch': 1} {'type': 'loss', 'content': 0.18364906311035156, 'timestamp': '2025-10-01 04:13:58.790260', 'step': 1127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:58.828656', 'step': 1127, 'epoch': 1} {'type': 'loss', 'content': 0.22065682709217072, 'timestamp': '2025-10-01 04:13:58.851970', 'step': 1128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:58.890943', 'step': 1128, 'epoch': 1} {'type': 'loss', 'content': 0.1131238043308258, 'timestamp': '2025-10-01 04:13:58.893102', 'step': 1129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:58.928604', 'step': 1129, 'epoch': 1} {'type': 'loss', 'content': 0.21281655132770538, 'timestamp': '2025-10-01 04:13:58.930497', 'step': 1130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:58.961541', 'step': 1130, 'epoch': 1} {'type': 'loss', 'content': 0.25834137201309204, 'timestamp': '2025-10-01 04:13:58.963573', 'step': 1131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:59.000277', 'step': 1131, 'epoch': 1} {'type': 'loss', 'content': 0.1511668562889099, 'timestamp': '2025-10-01 04:13:59.024221', 'step': 1132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:59.062869', 'step': 1132, 'epoch': 1} {'type': 'loss', 'content': 0.26565802097320557, 'timestamp': '2025-10-01 04:13:59.064615', 'step': 1133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:59.095818', 'step': 1133, 'epoch': 1} {'type': 'loss', 'content': 0.1560102105140686, 'timestamp': '2025-10-01 04:13:59.097881', 'step': 1134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:59.137677', 'step': 1134, 'epoch': 1} {'type': 'loss', 'content': 0.31943023204803467, 'timestamp': '2025-10-01 04:13:59.139739', 'step': 1135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:59.173741', 'step': 1135, 'epoch': 1} {'type': 'loss', 'content': 0.27505627274513245, 'timestamp': '2025-10-01 04:13:59.197101', 'step': 1136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:59.230823', 'step': 1136, 'epoch': 1} {'type': 'loss', 'content': 0.16207309067249298, 'timestamp': '2025-10-01 04:13:59.232790', 'step': 1137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:59.269529', 'step': 1137, 'epoch': 1} {'type': 'loss', 'content': 0.18570269644260406, 'timestamp': '2025-10-01 04:13:59.271380', 'step': 1138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:59.303235', 'step': 1138, 'epoch': 1} {'type': 'loss', 'content': 0.18192249536514282, 'timestamp': '2025-10-01 04:13:59.305139', 'step': 1139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:13:59.344458', 'step': 1139, 'epoch': 1} {'type': 'loss', 'content': 0.09549612551927567, 'timestamp': '2025-10-01 04:13:59.368001', 'step': 1140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:59.413743', 'step': 1140, 'epoch': 1} {'type': 'loss', 'content': 0.19020214676856995, 'timestamp': '2025-10-01 04:13:59.415771', 'step': 1141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:59.456791', 'step': 1141, 'epoch': 1} {'type': 'loss', 'content': 0.22947975993156433, 'timestamp': '2025-10-01 04:13:59.458790', 'step': 1142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:13:59.494789', 'step': 1142, 'epoch': 1} {'type': 'loss', 'content': 0.12752681970596313, 'timestamp': '2025-10-01 04:13:59.496734', 'step': 1143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:59.537624', 'step': 1143, 'epoch': 1} {'type': 'loss', 'content': 0.21237286925315857, 'timestamp': '2025-10-01 04:13:59.561038', 'step': 1144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:13:59.599788', 'step': 1144, 'epoch': 1} {'type': 'loss', 'content': 0.18452633917331696, 'timestamp': '2025-10-01 04:13:59.601846', 'step': 1145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:59.643377', 'step': 1145, 'epoch': 1} {'type': 'loss', 'content': 0.12301010638475418, 'timestamp': '2025-10-01 04:13:59.645667', 'step': 1146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:59.678232', 'step': 1146, 'epoch': 1} {'type': 'loss', 'content': 0.1728423833847046, 'timestamp': '2025-10-01 04:13:59.680337', 'step': 1147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:59.711680', 'step': 1147, 'epoch': 1} {'type': 'loss', 'content': 0.2544845640659332, 'timestamp': '2025-10-01 04:13:59.735384', 'step': 1148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:59.768694', 'step': 1148, 'epoch': 1} {'type': 'loss', 'content': 0.2588503956794739, 'timestamp': '2025-10-01 04:13:59.771119', 'step': 1149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:59.814104', 'step': 1149, 'epoch': 1} {'type': 'loss', 'content': 0.21072016656398773, 'timestamp': '2025-10-01 04:13:59.816221', 'step': 1150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:59.857478', 'step': 1150, 'epoch': 1} {'type': 'loss', 'content': 0.10711368918418884, 'timestamp': '2025-10-01 04:13:59.859711', 'step': 1151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:13:59.891677', 'step': 1151, 'epoch': 1} {'type': 'loss', 'content': 0.16950321197509766, 'timestamp': '2025-10-01 04:13:59.915105', 'step': 1152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:59.950858', 'step': 1152, 'epoch': 1} {'type': 'loss', 'content': 0.16150341928005219, 'timestamp': '2025-10-01 04:13:59.952896', 'step': 1153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:13:59.989458', 'step': 1153, 'epoch': 1} {'type': 'loss', 'content': 0.14137525856494904, 'timestamp': '2025-10-01 04:13:59.991805', 'step': 1154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:00.033899', 'step': 1154, 'epoch': 1} {'type': 'loss', 'content': 0.16798429191112518, 'timestamp': '2025-10-01 04:14:00.036033', 'step': 1155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:14:00.073985', 'step': 1155, 'epoch': 1} {'type': 'loss', 'content': 0.22975744307041168, 'timestamp': '2025-10-01 04:14:00.102161', 'step': 1156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:00.144420', 'step': 1156, 'epoch': 1} {'type': 'loss', 'content': 0.22121334075927734, 'timestamp': '2025-10-01 04:14:00.146907', 'step': 1157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:00.184404', 'step': 1157, 'epoch': 1} {'type': 'loss', 'content': 0.20748111605644226, 'timestamp': '2025-10-01 04:14:00.186645', 'step': 1158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:00.218911', 'step': 1158, 'epoch': 1} {'type': 'loss', 'content': 0.18510477244853973, 'timestamp': '2025-10-01 04:14:00.228955', 'step': 1159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:00.266369', 'step': 1159, 'epoch': 1} {'type': 'loss', 'content': 0.1708037406206131, 'timestamp': '2025-10-01 04:14:00.289847', 'step': 1160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:00.321460', 'step': 1160, 'epoch': 1} {'type': 'loss', 'content': 0.12342039495706558, 'timestamp': '2025-10-01 04:14:00.323390', 'step': 1161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:00.362174', 'step': 1161, 'epoch': 1} {'type': 'loss', 'content': 0.25187623500823975, 'timestamp': '2025-10-01 04:14:00.364580', 'step': 1162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:00.409226', 'step': 1162, 'epoch': 1} {'type': 'loss', 'content': 0.16479066014289856, 'timestamp': '2025-10-01 04:14:00.411221', 'step': 1163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:00.455227', 'step': 1163, 'epoch': 1} {'type': 'loss', 'content': 0.12166864424943924, 'timestamp': '2025-10-01 04:14:00.479003', 'step': 1164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:00.513959', 'step': 1164, 'epoch': 1} {'type': 'loss', 'content': 0.1427878737449646, 'timestamp': '2025-10-01 04:14:00.516121', 'step': 1165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:00.554742', 'step': 1165, 'epoch': 1} {'type': 'loss', 'content': 0.14106687903404236, 'timestamp': '2025-10-01 04:14:00.556906', 'step': 1166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:00.595291', 'step': 1166, 'epoch': 1} {'type': 'loss', 'content': 0.1721985638141632, 'timestamp': '2025-10-01 04:14:00.597246', 'step': 1167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:00.640668', 'step': 1167, 'epoch': 1} {'type': 'loss', 'content': 0.22439107298851013, 'timestamp': '2025-10-01 04:14:00.664221', 'step': 1168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:00.696390', 'step': 1168, 'epoch': 1} {'type': 'loss', 'content': 0.17057031393051147, 'timestamp': '2025-10-01 04:14:00.698652', 'step': 1169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:00.733804', 'step': 1169, 'epoch': 1} {'type': 'loss', 'content': 0.19465148448944092, 'timestamp': '2025-10-01 04:14:00.736257', 'step': 1170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:00.775171', 'step': 1170, 'epoch': 1} {'type': 'loss', 'content': 0.16185259819030762, 'timestamp': '2025-10-01 04:14:00.777393', 'step': 1171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:00.811908', 'step': 1171, 'epoch': 1} {'type': 'loss', 'content': 0.18968868255615234, 'timestamp': '2025-10-01 04:14:00.835696', 'step': 1172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:00.866913', 'step': 1172, 'epoch': 1} {'type': 'loss', 'content': 0.16331326961517334, 'timestamp': '2025-10-01 04:14:00.869420', 'step': 1173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:00.900684', 'step': 1173, 'epoch': 1} {'type': 'loss', 'content': 0.16522236168384552, 'timestamp': '2025-10-01 04:14:00.903155', 'step': 1174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:00.939036', 'step': 1174, 'epoch': 1} {'type': 'loss', 'content': 0.15102584660053253, 'timestamp': '2025-10-01 04:14:00.941360', 'step': 1175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:00.972867', 'step': 1175, 'epoch': 1} {'type': 'loss', 'content': 0.21934561431407928, 'timestamp': '2025-10-01 04:14:01.000285', 'step': 1176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:01.036523', 'step': 1176, 'epoch': 1} {'type': 'loss', 'content': 0.17196901142597198, 'timestamp': '2025-10-01 04:14:01.038861', 'step': 1177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:01.075964', 'step': 1177, 'epoch': 1} {'type': 'loss', 'content': 0.2585332691669464, 'timestamp': '2025-10-01 04:14:01.078274', 'step': 1178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.112837', 'step': 1178, 'epoch': 1} {'type': 'loss', 'content': 0.1370372176170349, 'timestamp': '2025-10-01 04:14:01.114755', 'step': 1179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.146392', 'step': 1179, 'epoch': 1} {'type': 'loss', 'content': 0.20377548038959503, 'timestamp': '2025-10-01 04:14:01.169839', 'step': 1180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:01.200904', 'step': 1180, 'epoch': 1} {'type': 'loss', 'content': 0.20105654001235962, 'timestamp': '2025-10-01 04:14:01.202876', 'step': 1181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:01.236916', 'step': 1181, 'epoch': 1} {'type': 'loss', 'content': 0.1925230324268341, 'timestamp': '2025-10-01 04:14:01.238999', 'step': 1182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.274226', 'step': 1182, 'epoch': 1} {'type': 'loss', 'content': 0.18039348721504211, 'timestamp': '2025-10-01 04:14:01.279236', 'step': 1183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:01.321477', 'step': 1183, 'epoch': 1} {'type': 'loss', 'content': 0.12462974339723587, 'timestamp': '2025-10-01 04:14:01.344956', 'step': 1184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:01.382088', 'step': 1184, 'epoch': 1} {'type': 'loss', 'content': 0.1134258508682251, 'timestamp': '2025-10-01 04:14:01.384120', 'step': 1185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.415689', 'step': 1185, 'epoch': 1} {'type': 'loss', 'content': 0.15674468874931335, 'timestamp': '2025-10-01 04:14:01.417646', 'step': 1186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:01.454844', 'step': 1186, 'epoch': 1} {'type': 'loss', 'content': 0.2577234208583832, 'timestamp': '2025-10-01 04:14:01.456722', 'step': 1187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.495956', 'step': 1187, 'epoch': 1} {'type': 'loss', 'content': 0.2037203013896942, 'timestamp': '2025-10-01 04:14:01.519420', 'step': 1188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.550417', 'step': 1188, 'epoch': 1} {'type': 'loss', 'content': 0.12556137144565582, 'timestamp': '2025-10-01 04:14:01.553364', 'step': 1189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:01.588251', 'step': 1189, 'epoch': 1} {'type': 'loss', 'content': 0.15747907757759094, 'timestamp': '2025-10-01 04:14:01.590243', 'step': 1190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:01.621644', 'step': 1190, 'epoch': 1} {'type': 'loss', 'content': 0.19467715919017792, 'timestamp': '2025-10-01 04:14:01.623500', 'step': 1191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:01.653532', 'step': 1191, 'epoch': 1} {'type': 'loss', 'content': 0.09864528477191925, 'timestamp': '2025-10-01 04:14:01.676836', 'step': 1192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.708623', 'step': 1192, 'epoch': 1} {'type': 'loss', 'content': 0.15655584633350372, 'timestamp': '2025-10-01 04:14:01.710493', 'step': 1193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.742319', 'step': 1193, 'epoch': 1} {'type': 'loss', 'content': 0.23916083574295044, 'timestamp': '2025-10-01 04:14:01.744230', 'step': 1194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.779261', 'step': 1194, 'epoch': 1} {'type': 'loss', 'content': 0.19118446111679077, 'timestamp': '2025-10-01 04:14:01.781255', 'step': 1195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.812618', 'step': 1195, 'epoch': 1} {'type': 'loss', 'content': 0.1970042884349823, 'timestamp': '2025-10-01 04:14:01.836707', 'step': 1196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.872897', 'step': 1196, 'epoch': 1} {'type': 'loss', 'content': 0.2449132353067398, 'timestamp': '2025-10-01 04:14:01.874817', 'step': 1197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.906215', 'step': 1197, 'epoch': 1} {'type': 'loss', 'content': 0.14524930715560913, 'timestamp': '2025-10-01 04:14:01.908253', 'step': 1198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:01.940374', 'step': 1198, 'epoch': 1} {'type': 'loss', 'content': 0.147426575422287, 'timestamp': '2025-10-01 04:14:01.942418', 'step': 1199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:01.973836', 'step': 1199, 'epoch': 1} {'type': 'loss', 'content': 0.13226737082004547, 'timestamp': '2025-10-01 04:14:01.997549', 'step': 1200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:02.036429', 'step': 1200, 'epoch': 1} {'type': 'loss', 'content': 0.14601679146289825, 'timestamp': '2025-10-01 04:14:02.038209', 'step': 1201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:02.071221', 'step': 1201, 'epoch': 1} {'type': 'loss', 'content': 0.18382325768470764, 'timestamp': '2025-10-01 04:14:02.073105', 'step': 1202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:02.106239', 'step': 1202, 'epoch': 1} {'type': 'loss', 'content': 0.19673997163772583, 'timestamp': '2025-10-01 04:14:02.108243', 'step': 1203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:02.144747', 'step': 1203, 'epoch': 1} {'type': 'loss', 'content': 0.19842775166034698, 'timestamp': '2025-10-01 04:14:02.168233', 'step': 1204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:02.204708', 'step': 1204, 'epoch': 1} {'type': 'loss', 'content': 0.1521584838628769, 'timestamp': '2025-10-01 04:14:02.206679', 'step': 1205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:02.247683', 'step': 1205, 'epoch': 1} {'type': 'loss', 'content': 0.24706591665744781, 'timestamp': '2025-10-01 04:14:02.249467', 'step': 1206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:02.282431', 'step': 1206, 'epoch': 1} {'type': 'loss', 'content': 0.1749776154756546, 'timestamp': '2025-10-01 04:14:02.284362', 'step': 1207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:02.321331', 'step': 1207, 'epoch': 1} {'type': 'loss', 'content': 0.14691929519176483, 'timestamp': '2025-10-01 04:14:02.344623', 'step': 1208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:02.382797', 'step': 1208, 'epoch': 1} {'type': 'loss', 'content': 0.10797692835330963, 'timestamp': '2025-10-01 04:14:02.384842', 'step': 1209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:02.417385', 'step': 1209, 'epoch': 1} {'type': 'loss', 'content': 0.22843222320079803, 'timestamp': '2025-10-01 04:14:02.419742', 'step': 1210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:02.451677', 'step': 1210, 'epoch': 1} {'type': 'loss', 'content': 0.15664026141166687, 'timestamp': '2025-10-01 04:14:02.453583', 'step': 1211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:14:02.497039', 'step': 1211, 'epoch': 1} {'type': 'loss', 'content': 0.19427061080932617, 'timestamp': '2025-10-01 04:14:02.525014', 'step': 1212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:02.564984', 'step': 1212, 'epoch': 1} {'type': 'loss', 'content': 0.13761228322982788, 'timestamp': '2025-10-01 04:14:02.567034', 'step': 1213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:02.601136', 'step': 1213, 'epoch': 1} {'type': 'loss', 'content': 0.14785030484199524, 'timestamp': '2025-10-01 04:14:02.603030', 'step': 1214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:02.633870', 'step': 1214, 'epoch': 1} {'type': 'loss', 'content': 0.14465931057929993, 'timestamp': '2025-10-01 04:14:02.644471', 'step': 1215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:02.685495', 'step': 1215, 'epoch': 1} {'type': 'loss', 'content': 0.14367932081222534, 'timestamp': '2025-10-01 04:14:02.708893', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:02.738974', 'step': 1216, 'epoch': 1} {'type': 'loss', 'content': 0.16902947425842285, 'timestamp': '2025-10-01 04:14:02.740963', 'step': 1217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:02.775806', 'step': 1217, 'epoch': 1} {'type': 'loss', 'content': 0.15678004920482635, 'timestamp': '2025-10-01 04:14:02.778054', 'step': 1218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:02.809707', 'step': 1218, 'epoch': 1} {'type': 'loss', 'content': 0.19661307334899902, 'timestamp': '2025-10-01 04:14:02.811673', 'step': 1219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:02.848543', 'step': 1219, 'epoch': 1} {'type': 'loss', 'content': 0.13258209824562073, 'timestamp': '2025-10-01 04:14:02.872307', 'step': 1220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:02.906385', 'step': 1220, 'epoch': 1} {'type': 'loss', 'content': 0.18188287317752838, 'timestamp': '2025-10-01 04:14:02.909187', 'step': 1221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:02.940758', 'step': 1221, 'epoch': 1} {'type': 'loss', 'content': 0.12792739272117615, 'timestamp': '2025-10-01 04:14:02.944012', 'step': 1222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:02.976686', 'step': 1222, 'epoch': 1} {'type': 'loss', 'content': 0.2021760791540146, 'timestamp': '2025-10-01 04:14:02.978761', 'step': 1223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:03.012510', 'step': 1223, 'epoch': 1} {'type': 'loss', 'content': 0.12622644007205963, 'timestamp': '2025-10-01 04:14:03.036304', 'step': 1224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:03.073723', 'step': 1224, 'epoch': 1} {'type': 'loss', 'content': 0.2547675371170044, 'timestamp': '2025-10-01 04:14:03.075731', 'step': 1225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:03.111015', 'step': 1225, 'epoch': 1} {'type': 'loss', 'content': 0.2808956503868103, 'timestamp': '2025-10-01 04:14:03.112913', 'step': 1226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:03.149331', 'step': 1226, 'epoch': 1} {'type': 'loss', 'content': 0.23004654049873352, 'timestamp': '2025-10-01 04:14:03.151502', 'step': 1227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:03.184049', 'step': 1227, 'epoch': 1} {'type': 'loss', 'content': 0.13192282617092133, 'timestamp': '2025-10-01 04:14:03.207828', 'step': 1228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:03.240644', 'step': 1228, 'epoch': 1} {'type': 'loss', 'content': 0.21617205440998077, 'timestamp': '2025-10-01 04:14:03.242549', 'step': 1229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:03.276511', 'step': 1229, 'epoch': 1} {'type': 'loss', 'content': 0.28671491146087646, 'timestamp': '2025-10-01 04:14:03.278473', 'step': 1230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:03.309370', 'step': 1230, 'epoch': 1} {'type': 'loss', 'content': 0.21424822509288788, 'timestamp': '2025-10-01 04:14:03.311761', 'step': 1231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:03.354287', 'step': 1231, 'epoch': 1} {'type': 'loss', 'content': 0.20343461632728577, 'timestamp': '2025-10-01 04:14:03.377739', 'step': 1232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:03.413745', 'step': 1232, 'epoch': 1} {'type': 'loss', 'content': 0.18809902667999268, 'timestamp': '2025-10-01 04:14:03.415650', 'step': 1233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:03.446949', 'step': 1233, 'epoch': 1} {'type': 'loss', 'content': 0.15241387486457825, 'timestamp': '2025-10-01 04:14:03.448849', 'step': 1234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:03.493018', 'step': 1234, 'epoch': 1} {'type': 'loss', 'content': 0.135323628783226, 'timestamp': '2025-10-01 04:14:03.494948', 'step': 1235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:03.534508', 'step': 1235, 'epoch': 1} {'type': 'loss', 'content': 0.12697742879390717, 'timestamp': '2025-10-01 04:14:03.557938', 'step': 1236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:03.589214', 'step': 1236, 'epoch': 1} {'type': 'loss', 'content': 0.2140718400478363, 'timestamp': '2025-10-01 04:14:03.591089', 'step': 1237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-10-01 04:14:03.643276', 'step': 1237, 'epoch': 1} {'type': 'loss', 'content': 0.4302031397819519, 'timestamp': '2025-10-01 04:14:03.649441', 'step': 1238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:03.698475', 'step': 1238, 'epoch': 1} {'type': 'loss', 'content': 0.14815053343772888, 'timestamp': '2025-10-01 04:14:03.703881', 'step': 1239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:03.734469', 'step': 1239, 'epoch': 1} {'type': 'loss', 'content': 0.2483849972486496, 'timestamp': '2025-10-01 04:14:03.757578', 'step': 1240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:03.793083', 'step': 1240, 'epoch': 1} {'type': 'loss', 'content': 0.30370762944221497, 'timestamp': '2025-10-01 04:14:03.795164', 'step': 1241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:03.826976', 'step': 1241, 'epoch': 1} {'type': 'loss', 'content': 0.16819454729557037, 'timestamp': '2025-10-01 04:14:03.829099', 'step': 1242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:03.867985', 'step': 1242, 'epoch': 1} {'type': 'loss', 'content': 0.17077206075191498, 'timestamp': '2025-10-01 04:14:03.878921', 'step': 1243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:03.912649', 'step': 1243, 'epoch': 1} {'type': 'loss', 'content': 0.108652763068676, 'timestamp': '2025-10-01 04:14:03.936176', 'step': 1244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:03.969582', 'step': 1244, 'epoch': 1} {'type': 'loss', 'content': 0.23065786063671112, 'timestamp': '2025-10-01 04:14:03.971482', 'step': 1245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:04.006806', 'step': 1245, 'epoch': 1} {'type': 'loss', 'content': 0.15656208992004395, 'timestamp': '2025-10-01 04:14:04.009381', 'step': 1246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:04.046830', 'step': 1246, 'epoch': 1} {'type': 'loss', 'content': 0.2536858320236206, 'timestamp': '2025-10-01 04:14:04.048813', 'step': 1247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.084618', 'step': 1247, 'epoch': 1} {'type': 'loss', 'content': 0.09957437217235565, 'timestamp': '2025-10-01 04:14:04.108213', 'step': 1248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.139678', 'step': 1248, 'epoch': 1} {'type': 'loss', 'content': 0.17612172663211823, 'timestamp': '2025-10-01 04:14:04.141643', 'step': 1249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.175534', 'step': 1249, 'epoch': 1} {'type': 'loss', 'content': 0.14010071754455566, 'timestamp': '2025-10-01 04:14:04.177717', 'step': 1250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:04.213452', 'step': 1250, 'epoch': 1} {'type': 'loss', 'content': 0.17902478575706482, 'timestamp': '2025-10-01 04:14:04.215345', 'step': 1251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:04.259323', 'step': 1251, 'epoch': 1} {'type': 'loss', 'content': 0.1486227661371231, 'timestamp': '2025-10-01 04:14:04.283611', 'step': 1252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.315298', 'step': 1252, 'epoch': 1} {'type': 'loss', 'content': 0.14100901782512665, 'timestamp': '2025-10-01 04:14:04.317152', 'step': 1253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.362166', 'step': 1253, 'epoch': 1} {'type': 'loss', 'content': 0.14230921864509583, 'timestamp': '2025-10-01 04:14:04.364119', 'step': 1254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.397224', 'step': 1254, 'epoch': 1} {'type': 'loss', 'content': 0.15728655457496643, 'timestamp': '2025-10-01 04:14:04.399140', 'step': 1255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:04.429341', 'step': 1255, 'epoch': 1} {'type': 'loss', 'content': 0.18610593676567078, 'timestamp': '2025-10-01 04:14:04.452892', 'step': 1256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.488362', 'step': 1256, 'epoch': 1} {'type': 'loss', 'content': 0.1692594438791275, 'timestamp': '2025-10-01 04:14:04.490649', 'step': 1257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.527580', 'step': 1257, 'epoch': 1} {'type': 'loss', 'content': 0.22295206785202026, 'timestamp': '2025-10-01 04:14:04.529266', 'step': 1258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.565372', 'step': 1258, 'epoch': 1} {'type': 'loss', 'content': 0.1852664351463318, 'timestamp': '2025-10-01 04:14:04.567757', 'step': 1259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.622342', 'step': 1259, 'epoch': 1} {'type': 'loss', 'content': 0.26325011253356934, 'timestamp': '2025-10-01 04:14:04.645820', 'step': 1260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:04.677202', 'step': 1260, 'epoch': 1} {'type': 'loss', 'content': 0.20734664797782898, 'timestamp': '2025-10-01 04:14:04.679395', 'step': 1261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:04.712663', 'step': 1261, 'epoch': 1} {'type': 'loss', 'content': 0.24802350997924805, 'timestamp': '2025-10-01 04:14:04.714416', 'step': 1262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.746060', 'step': 1262, 'epoch': 1} {'type': 'loss', 'content': 0.2029704451560974, 'timestamp': '2025-10-01 04:14:04.747902', 'step': 1263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.780514', 'step': 1263, 'epoch': 1} {'type': 'loss', 'content': 0.14316943287849426, 'timestamp': '2025-10-01 04:14:04.803860', 'step': 1264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.844951', 'step': 1264, 'epoch': 1} {'type': 'loss', 'content': 0.28004705905914307, 'timestamp': '2025-10-01 04:14:04.846810', 'step': 1265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:04.878834', 'step': 1265, 'epoch': 1} {'type': 'loss', 'content': 0.17839376628398895, 'timestamp': '2025-10-01 04:14:04.880885', 'step': 1266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:04.912470', 'step': 1266, 'epoch': 1} {'type': 'loss', 'content': 0.2604229748249054, 'timestamp': '2025-10-01 04:14:04.914311', 'step': 1267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:04.948224', 'step': 1267, 'epoch': 1} {'type': 'loss', 'content': 0.05971445515751839, 'timestamp': '2025-10-01 04:14:04.972001', 'step': 1268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.004006', 'step': 1268, 'epoch': 1} {'type': 'loss', 'content': 0.14931997656822205, 'timestamp': '2025-10-01 04:14:05.006406', 'step': 1269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:05.038030', 'step': 1269, 'epoch': 1} {'type': 'loss', 'content': 0.34150832891464233, 'timestamp': '2025-10-01 04:14:05.040147', 'step': 1270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:05.073058', 'step': 1270, 'epoch': 1} {'type': 'loss', 'content': 0.2516546845436096, 'timestamp': '2025-10-01 04:14:05.075064', 'step': 1271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:05.109695', 'step': 1271, 'epoch': 1} {'type': 'loss', 'content': 0.18507491052150726, 'timestamp': '2025-10-01 04:14:05.133238', 'step': 1272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:05.175234', 'step': 1272, 'epoch': 1} {'type': 'loss', 'content': 0.1944434493780136, 'timestamp': '2025-10-01 04:14:05.177289', 'step': 1273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.207886', 'step': 1273, 'epoch': 1} {'type': 'loss', 'content': 0.12553349137306213, 'timestamp': '2025-10-01 04:14:05.209806', 'step': 1274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.245064', 'step': 1274, 'epoch': 1} {'type': 'loss', 'content': 0.10843156278133392, 'timestamp': '2025-10-01 04:14:05.247186', 'step': 1275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:05.278587', 'step': 1275, 'epoch': 1} {'type': 'loss', 'content': 0.21208201348781586, 'timestamp': '2025-10-01 04:14:05.304612', 'step': 1276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.335588', 'step': 1276, 'epoch': 1} {'type': 'loss', 'content': 0.270678848028183, 'timestamp': '2025-10-01 04:14:05.337237', 'step': 1277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:05.376250', 'step': 1277, 'epoch': 1} {'type': 'loss', 'content': 0.1720942109823227, 'timestamp': '2025-10-01 04:14:05.378133', 'step': 1278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:05.410744', 'step': 1278, 'epoch': 1} {'type': 'loss', 'content': 0.15711042284965515, 'timestamp': '2025-10-01 04:14:05.412663', 'step': 1279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:05.458428', 'step': 1279, 'epoch': 1} {'type': 'loss', 'content': 0.20112532377243042, 'timestamp': '2025-10-01 04:14:05.481799', 'step': 1280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:05.521654', 'step': 1280, 'epoch': 1} {'type': 'loss', 'content': 0.16448698937892914, 'timestamp': '2025-10-01 04:14:05.523600', 'step': 1281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.555701', 'step': 1281, 'epoch': 1} {'type': 'loss', 'content': 0.13424277305603027, 'timestamp': '2025-10-01 04:14:05.560869', 'step': 1282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:05.597272', 'step': 1282, 'epoch': 1} {'type': 'loss', 'content': 0.18567340075969696, 'timestamp': '2025-10-01 04:14:05.599183', 'step': 1283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.630632', 'step': 1283, 'epoch': 1} {'type': 'loss', 'content': 0.15065766870975494, 'timestamp': '2025-10-01 04:14:05.654034', 'step': 1284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:05.688519', 'step': 1284, 'epoch': 1} {'type': 'loss', 'content': 0.18214890360832214, 'timestamp': '2025-10-01 04:14:05.691123', 'step': 1285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.735624', 'step': 1285, 'epoch': 1} {'type': 'loss', 'content': 0.22371110320091248, 'timestamp': '2025-10-01 04:14:05.737541', 'step': 1286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.791199', 'step': 1286, 'epoch': 1} {'type': 'loss', 'content': 0.2663910388946533, 'timestamp': '2025-10-01 04:14:05.793540', 'step': 1287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.837480', 'step': 1287, 'epoch': 1} {'type': 'loss', 'content': 0.13251277804374695, 'timestamp': '2025-10-01 04:14:05.860990', 'step': 1288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.897227', 'step': 1288, 'epoch': 1} {'type': 'loss', 'content': 0.1474667340517044, 'timestamp': '2025-10-01 04:14:05.899150', 'step': 1289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:05.935575', 'step': 1289, 'epoch': 1} {'type': 'loss', 'content': 0.17985443770885468, 'timestamp': '2025-10-01 04:14:05.940918', 'step': 1290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:05.974379', 'step': 1290, 'epoch': 1} {'type': 'loss', 'content': 0.2125849574804306, 'timestamp': '2025-10-01 04:14:05.976313', 'step': 1291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:06.022175', 'step': 1291, 'epoch': 1} {'type': 'loss', 'content': 0.17134550213813782, 'timestamp': '2025-10-01 04:14:06.045543', 'step': 1292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:06.081060', 'step': 1292, 'epoch': 1} {'type': 'loss', 'content': 0.14768622815608978, 'timestamp': '2025-10-01 04:14:06.084950', 'step': 1293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:06.138500', 'step': 1293, 'epoch': 1} {'type': 'loss', 'content': 0.1586385816335678, 'timestamp': '2025-10-01 04:14:06.140378', 'step': 1294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:06.188140', 'step': 1294, 'epoch': 1} {'type': 'loss', 'content': 0.2189406156539917, 'timestamp': '2025-10-01 04:14:06.189998', 'step': 1295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:06.228247', 'step': 1295, 'epoch': 1} {'type': 'loss', 'content': 0.22936278581619263, 'timestamp': '2025-10-01 04:14:06.251246', 'step': 1296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:06.299439', 'step': 1296, 'epoch': 1} {'type': 'loss', 'content': 0.12707071006298065, 'timestamp': '2025-10-01 04:14:06.301865', 'step': 1297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:06.349827', 'step': 1297, 'epoch': 1} {'type': 'loss', 'content': 0.19300729036331177, 'timestamp': '2025-10-01 04:14:06.351891', 'step': 1298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:06.411515', 'step': 1298, 'epoch': 1} {'type': 'loss', 'content': 0.17705132067203522, 'timestamp': '2025-10-01 04:14:06.413517', 'step': 1299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:06.447509', 'step': 1299, 'epoch': 1} {'type': 'loss', 'content': 0.15483003854751587, 'timestamp': '2025-10-01 04:14:06.472422', 'step': 1300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:06.517757', 'step': 1300, 'epoch': 1} {'type': 'loss', 'content': 0.2426130175590515, 'timestamp': '2025-10-01 04:14:06.520021', 'step': 1301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:06.587470', 'step': 1301, 'epoch': 1} {'type': 'loss', 'content': 0.19928203523159027, 'timestamp': '2025-10-01 04:14:06.591418', 'step': 1302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:06.644797', 'step': 1302, 'epoch': 1} {'type': 'loss', 'content': 0.26495814323425293, 'timestamp': '2025-10-01 04:14:06.646875', 'step': 1303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:06.691189', 'step': 1303, 'epoch': 1} {'type': 'loss', 'content': 0.1854230761528015, 'timestamp': '2025-10-01 04:14:06.715990', 'step': 1304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:06.760587', 'step': 1304, 'epoch': 1} {'type': 'loss', 'content': 0.16574619710445404, 'timestamp': '2025-10-01 04:14:06.762761', 'step': 1305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:06.807363', 'step': 1305, 'epoch': 1} {'type': 'loss', 'content': 0.33823204040527344, 'timestamp': '2025-10-01 04:14:06.809228', 'step': 1306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:06.854076', 'step': 1306, 'epoch': 1} {'type': 'loss', 'content': 0.1871311068534851, 'timestamp': '2025-10-01 04:14:06.855861', 'step': 1307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:06.909119', 'step': 1307, 'epoch': 1} {'type': 'loss', 'content': 0.2435322105884552, 'timestamp': '2025-10-01 04:14:06.932493', 'step': 1308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:06.987705', 'step': 1308, 'epoch': 1} {'type': 'loss', 'content': 0.1436932384967804, 'timestamp': '2025-10-01 04:14:06.989362', 'step': 1309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:07.031246', 'step': 1309, 'epoch': 1} {'type': 'loss', 'content': 0.09690041095018387, 'timestamp': '2025-10-01 04:14:07.033195', 'step': 1310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:07.074830', 'step': 1310, 'epoch': 1} {'type': 'loss', 'content': 0.14588899910449982, 'timestamp': '2025-10-01 04:14:07.076736', 'step': 1311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:07.128908', 'step': 1311, 'epoch': 1} {'type': 'loss', 'content': 0.15013499557971954, 'timestamp': '2025-10-01 04:14:07.152806', 'step': 1312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:07.185095', 'step': 1312, 'epoch': 1} {'type': 'loss', 'content': 0.19924740493297577, 'timestamp': '2025-10-01 04:14:07.186907', 'step': 1313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:07.229009', 'step': 1313, 'epoch': 1} {'type': 'loss', 'content': 0.16099165380001068, 'timestamp': '2025-10-01 04:14:07.231594', 'step': 1314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:07.273223', 'step': 1314, 'epoch': 1} {'type': 'loss', 'content': 0.19286343455314636, 'timestamp': '2025-10-01 04:14:07.275828', 'step': 1315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:07.319341', 'step': 1315, 'epoch': 1} {'type': 'loss', 'content': 0.22120893001556396, 'timestamp': '2025-10-01 04:14:07.342759', 'step': 1316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:07.385954', 'step': 1316, 'epoch': 1} {'type': 'loss', 'content': 0.15959934890270233, 'timestamp': '2025-10-01 04:14:07.387902', 'step': 1317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:07.430911', 'step': 1317, 'epoch': 1} {'type': 'loss', 'content': 0.13344216346740723, 'timestamp': '2025-10-01 04:14:07.432857', 'step': 1318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:07.467470', 'step': 1318, 'epoch': 1} {'type': 'loss', 'content': 0.16937343776226044, 'timestamp': '2025-10-01 04:14:07.469928', 'step': 1319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:07.512389', 'step': 1319, 'epoch': 1} {'type': 'loss', 'content': 0.07322656363248825, 'timestamp': '2025-10-01 04:14:07.535975', 'step': 1320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:07.568878', 'step': 1320, 'epoch': 1} {'type': 'loss', 'content': 0.19311083853244781, 'timestamp': '2025-10-01 04:14:07.570857', 'step': 1321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:14:07.606312', 'step': 1321, 'epoch': 1} {'type': 'loss', 'content': 0.1172681525349617, 'timestamp': '2025-10-01 04:14:07.610659', 'step': 1322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:07.643185', 'step': 1322, 'epoch': 1} {'type': 'loss', 'content': 0.12328366935253143, 'timestamp': '2025-10-01 04:14:07.645165', 'step': 1323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:07.679519', 'step': 1323, 'epoch': 1} {'type': 'loss', 'content': 0.139862060546875, 'timestamp': '2025-10-01 04:14:07.714405', 'step': 1324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:07.757811', 'step': 1324, 'epoch': 1} {'type': 'loss', 'content': 0.27823466062545776, 'timestamp': '2025-10-01 04:14:07.759863', 'step': 1325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:07.792183', 'step': 1325, 'epoch': 1} {'type': 'loss', 'content': 0.10487933456897736, 'timestamp': '2025-10-01 04:14:07.796205', 'step': 1326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:07.837826', 'step': 1326, 'epoch': 1} {'type': 'loss', 'content': 0.164432093501091, 'timestamp': '2025-10-01 04:14:07.839916', 'step': 1327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:07.874565', 'step': 1327, 'epoch': 1} {'type': 'loss', 'content': 0.14392715692520142, 'timestamp': '2025-10-01 04:14:07.897883', 'step': 1328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:07.932231', 'step': 1328, 'epoch': 1} {'type': 'loss', 'content': 0.13906267285346985, 'timestamp': '2025-10-01 04:14:07.934080', 'step': 1329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:07.977541', 'step': 1329, 'epoch': 1} {'type': 'loss', 'content': 0.1143917664885521, 'timestamp': '2025-10-01 04:14:07.979626', 'step': 1330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:08.023503', 'step': 1330, 'epoch': 1} {'type': 'loss', 'content': 0.1403450220823288, 'timestamp': '2025-10-01 04:14:08.025390', 'step': 1331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:08.070968', 'step': 1331, 'epoch': 1} {'type': 'loss', 'content': 0.2534213066101074, 'timestamp': '2025-10-01 04:14:08.094364', 'step': 1332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:08.129052', 'step': 1332, 'epoch': 1} {'type': 'loss', 'content': 0.19857366383075714, 'timestamp': '2025-10-01 04:14:08.131916', 'step': 1333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:08.165933', 'step': 1333, 'epoch': 1} {'type': 'loss', 'content': 0.2699199318885803, 'timestamp': '2025-10-01 04:14:08.167987', 'step': 1334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:08.217876', 'step': 1334, 'epoch': 1} {'type': 'loss', 'content': 0.19128887355327606, 'timestamp': '2025-10-01 04:14:08.219910', 'step': 1335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:08.261691', 'step': 1335, 'epoch': 1} {'type': 'loss', 'content': 0.16227343678474426, 'timestamp': '2025-10-01 04:14:08.286121', 'step': 1336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:08.325931', 'step': 1336, 'epoch': 1} {'type': 'loss', 'content': 0.10673652589321136, 'timestamp': '2025-10-01 04:14:08.327915', 'step': 1337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:08.362974', 'step': 1337, 'epoch': 1} {'type': 'loss', 'content': 0.10750139504671097, 'timestamp': '2025-10-01 04:14:08.365343', 'step': 1338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:08.398116', 'step': 1338, 'epoch': 1} {'type': 'loss', 'content': 0.15232591331005096, 'timestamp': '2025-10-01 04:14:08.400778', 'step': 1339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:08.448246', 'step': 1339, 'epoch': 1} {'type': 'loss', 'content': 0.24932055175304413, 'timestamp': '2025-10-01 04:14:08.471732', 'step': 1340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:08.504979', 'step': 1340, 'epoch': 1} {'type': 'loss', 'content': 0.19792962074279785, 'timestamp': '2025-10-01 04:14:08.507045', 'step': 1341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:08.547964', 'step': 1341, 'epoch': 1} {'type': 'loss', 'content': 0.2160211056470871, 'timestamp': '2025-10-01 04:14:08.549968', 'step': 1342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:08.583594', 'step': 1342, 'epoch': 1} {'type': 'loss', 'content': 0.13359564542770386, 'timestamp': '2025-10-01 04:14:08.585752', 'step': 1343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:08.618590', 'step': 1343, 'epoch': 1} {'type': 'loss', 'content': 0.12206931412220001, 'timestamp': '2025-10-01 04:14:08.642217', 'step': 1344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:08.674376', 'step': 1344, 'epoch': 1} {'type': 'loss', 'content': 0.3226695656776428, 'timestamp': '2025-10-01 04:14:08.676404', 'step': 1345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:08.712908', 'step': 1345, 'epoch': 1} {'type': 'loss', 'content': 0.1955525279045105, 'timestamp': '2025-10-01 04:14:08.714864', 'step': 1346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:08.750117', 'step': 1346, 'epoch': 1} {'type': 'loss', 'content': 0.11490349471569061, 'timestamp': '2025-10-01 04:14:08.752237', 'step': 1347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:08.794862', 'step': 1347, 'epoch': 1} {'type': 'loss', 'content': 0.2338830530643463, 'timestamp': '2025-10-01 04:14:08.818585', 'step': 1348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:08.855566', 'step': 1348, 'epoch': 1} {'type': 'loss', 'content': 0.21479423344135284, 'timestamp': '2025-10-01 04:14:08.857642', 'step': 1349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:08.900109', 'step': 1349, 'epoch': 1} {'type': 'loss', 'content': 0.19165848195552826, 'timestamp': '2025-10-01 04:14:08.902621', 'step': 1350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:08.945568', 'step': 1350, 'epoch': 1} {'type': 'loss', 'content': 0.14249132573604584, 'timestamp': '2025-10-01 04:14:08.957928', 'step': 1351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:08.992506', 'step': 1351, 'epoch': 1} {'type': 'loss', 'content': 0.160914346575737, 'timestamp': '2025-10-01 04:14:09.016355', 'step': 1352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:09.050081', 'step': 1352, 'epoch': 1} {'type': 'loss', 'content': 0.2193601131439209, 'timestamp': '2025-10-01 04:14:09.052321', 'step': 1353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:09.085112', 'step': 1353, 'epoch': 1} {'type': 'loss', 'content': 0.1507333219051361, 'timestamp': '2025-10-01 04:14:09.088186', 'step': 1354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:09.120542', 'step': 1354, 'epoch': 1} {'type': 'loss', 'content': 0.3117137551307678, 'timestamp': '2025-10-01 04:14:09.122595', 'step': 1355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:09.154347', 'step': 1355, 'epoch': 1} {'type': 'loss', 'content': 0.1411973387002945, 'timestamp': '2025-10-01 04:14:09.177920', 'step': 1356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:09.221297', 'step': 1356, 'epoch': 1} {'type': 'loss', 'content': 0.1913304477930069, 'timestamp': '2025-10-01 04:14:09.223498', 'step': 1357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:09.264029', 'step': 1357, 'epoch': 1} {'type': 'loss', 'content': 0.28921228647232056, 'timestamp': '2025-10-01 04:14:09.266190', 'step': 1358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:09.299629', 'step': 1358, 'epoch': 1} {'type': 'loss', 'content': 0.2404973804950714, 'timestamp': '2025-10-01 04:14:09.302051', 'step': 1359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:09.335150', 'step': 1359, 'epoch': 1} {'type': 'loss', 'content': 0.19545719027519226, 'timestamp': '2025-10-01 04:14:09.358823', 'step': 1360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:09.391022', 'step': 1360, 'epoch': 1} {'type': 'loss', 'content': 0.20104198157787323, 'timestamp': '2025-10-01 04:14:09.393345', 'step': 1361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:09.426698', 'step': 1361, 'epoch': 1} {'type': 'loss', 'content': 0.0946267694234848, 'timestamp': '2025-10-01 04:14:09.428726', 'step': 1362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:09.464408', 'step': 1362, 'epoch': 1} {'type': 'loss', 'content': 0.11867218464612961, 'timestamp': '2025-10-01 04:14:09.467407', 'step': 1363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:09.508241', 'step': 1363, 'epoch': 1} {'type': 'loss', 'content': 0.2344781905412674, 'timestamp': '2025-10-01 04:14:09.531761', 'step': 1364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:09.563941', 'step': 1364, 'epoch': 1} {'type': 'loss', 'content': 0.09501927345991135, 'timestamp': '2025-10-01 04:14:09.566224', 'step': 1365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:09.601017', 'step': 1365, 'epoch': 1} {'type': 'loss', 'content': 0.13154254853725433, 'timestamp': '2025-10-01 04:14:09.603812', 'step': 1366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:09.636167', 'step': 1366, 'epoch': 1} {'type': 'loss', 'content': 0.1840100884437561, 'timestamp': '2025-10-01 04:14:09.638499', 'step': 1367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:09.675335', 'step': 1367, 'epoch': 1} {'type': 'loss', 'content': 0.08729643374681473, 'timestamp': '2025-10-01 04:14:09.699066', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:09.733269', 'step': 1368, 'epoch': 1} {'type': 'loss', 'content': 0.18857261538505554, 'timestamp': '2025-10-01 04:14:09.735780', 'step': 1369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:09.776426', 'step': 1369, 'epoch': 1} {'type': 'loss', 'content': 0.17243044078350067, 'timestamp': '2025-10-01 04:14:09.779126', 'step': 1370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:09.812425', 'step': 1370, 'epoch': 1} {'type': 'loss', 'content': 0.19924727082252502, 'timestamp': '2025-10-01 04:14:09.814853', 'step': 1371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:09.860673', 'step': 1371, 'epoch': 1} {'type': 'loss', 'content': 0.13001157343387604, 'timestamp': '2025-10-01 04:14:09.884344', 'step': 1372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:09.936309', 'step': 1372, 'epoch': 1} {'type': 'loss', 'content': 0.080281563103199, 'timestamp': '2025-10-01 04:14:09.938512', 'step': 1373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:09.983000', 'step': 1373, 'epoch': 1} {'type': 'loss', 'content': 0.2061789184808731, 'timestamp': '2025-10-01 04:14:09.985116', 'step': 1374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:10.017476', 'step': 1374, 'epoch': 1} {'type': 'loss', 'content': 0.1552032083272934, 'timestamp': '2025-10-01 04:14:10.020421', 'step': 1375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:10.056574', 'step': 1375, 'epoch': 1} {'type': 'loss', 'content': 0.232905775308609, 'timestamp': '2025-10-01 04:14:10.080319', 'step': 1376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:10.116702', 'step': 1376, 'epoch': 1} {'type': 'loss', 'content': 0.16927866637706757, 'timestamp': '2025-10-01 04:14:10.118694', 'step': 1377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:10.153707', 'step': 1377, 'epoch': 1} {'type': 'loss', 'content': 0.20870782434940338, 'timestamp': '2025-10-01 04:14:10.155892', 'step': 1378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:10.190434', 'step': 1378, 'epoch': 1} {'type': 'loss', 'content': 0.13802681863307953, 'timestamp': '2025-10-01 04:14:10.192326', 'step': 1379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:10.227328', 'step': 1379, 'epoch': 1} {'type': 'loss', 'content': 0.15733245015144348, 'timestamp': '2025-10-01 04:14:10.250742', 'step': 1380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:10.283574', 'step': 1380, 'epoch': 1} {'type': 'loss', 'content': 0.165176123380661, 'timestamp': '2025-10-01 04:14:10.285483', 'step': 1381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:10.318522', 'step': 1381, 'epoch': 1} {'type': 'loss', 'content': 0.13485002517700195, 'timestamp': '2025-10-01 04:14:10.320556', 'step': 1382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:10.353296', 'step': 1382, 'epoch': 1} {'type': 'loss', 'content': 0.234217569231987, 'timestamp': '2025-10-01 04:14:10.355253', 'step': 1383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:10.389964', 'step': 1383, 'epoch': 1} {'type': 'loss', 'content': 0.14869186282157898, 'timestamp': '2025-10-01 04:14:10.413322', 'step': 1384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:10.447161', 'step': 1384, 'epoch': 1} {'type': 'loss', 'content': 0.2330750823020935, 'timestamp': '2025-10-01 04:14:10.449516', 'step': 1385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:10.484340', 'step': 1385, 'epoch': 1} {'type': 'loss', 'content': 0.18958020210266113, 'timestamp': '2025-10-01 04:14:10.486680', 'step': 1386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:10.521341', 'step': 1386, 'epoch': 1} {'type': 'loss', 'content': 0.16224175691604614, 'timestamp': '2025-10-01 04:14:10.523379', 'step': 1387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:10.556228', 'step': 1387, 'epoch': 1} {'type': 'loss', 'content': 0.18904699385166168, 'timestamp': '2025-10-01 04:14:10.579755', 'step': 1388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:10.613514', 'step': 1388, 'epoch': 1} {'type': 'loss', 'content': 0.19675561785697937, 'timestamp': '2025-10-01 04:14:10.615367', 'step': 1389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:10.657626', 'step': 1389, 'epoch': 1} {'type': 'loss', 'content': 0.16477420926094055, 'timestamp': '2025-10-01 04:14:10.659621', 'step': 1390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:10.700424', 'step': 1390, 'epoch': 1} {'type': 'loss', 'content': 0.20223088562488556, 'timestamp': '2025-10-01 04:14:10.703026', 'step': 1391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:10.738621', 'step': 1391, 'epoch': 1} {'type': 'loss', 'content': 0.0736425444483757, 'timestamp': '2025-10-01 04:14:10.762096', 'step': 1392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:10.797175', 'step': 1392, 'epoch': 1} {'type': 'loss', 'content': 0.22080369293689728, 'timestamp': '2025-10-01 04:14:10.799558', 'step': 1393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:10.833860', 'step': 1393, 'epoch': 1} {'type': 'loss', 'content': 0.17005428671836853, 'timestamp': '2025-10-01 04:14:10.836001', 'step': 1394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:10.920698', 'step': 1394, 'epoch': 1} {'type': 'loss', 'content': 0.12855789065361023, 'timestamp': '2025-10-01 04:14:10.922630', 'step': 1395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:10.956301', 'step': 1395, 'epoch': 1} {'type': 'loss', 'content': 0.25771564245224, 'timestamp': '2025-10-01 04:14:10.979774', 'step': 1396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:11.035289', 'step': 1396, 'epoch': 1} {'type': 'loss', 'content': 0.18582287430763245, 'timestamp': '2025-10-01 04:14:11.037089', 'step': 1397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:11.070406', 'step': 1397, 'epoch': 1} {'type': 'loss', 'content': 0.18987511098384857, 'timestamp': '2025-10-01 04:14:11.072271', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:14:21.926602', 'step': 1398, 'epoch': 1} {'type': 'pplx', 'content': 8114.183848916305, 'timestamp': '2025-10-01 04:14:21.929545', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:21.959192', 'step': 1398, 'epoch': 1} {'type': 'loss', 'content': 0.24352335929870605, 'timestamp': '2025-10-01 04:14:21.961376', 'step': 1399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:21.992747', 'step': 1399, 'epoch': 1} {'type': 'loss', 'content': 0.1493397057056427, 'timestamp': '2025-10-01 04:14:22.016286', 'step': 1400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:22.053769', 'step': 1400, 'epoch': 1} {'type': 'loss', 'content': 0.22232936322689056, 'timestamp': '2025-10-01 04:14:22.055755', 'step': 1401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:22.086595', 'step': 1401, 'epoch': 1} {'type': 'loss', 'content': 0.21038450300693512, 'timestamp': '2025-10-01 04:14:22.088413', 'step': 1402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:22.119126', 'step': 1402, 'epoch': 1} {'type': 'loss', 'content': 0.17499659955501556, 'timestamp': '2025-10-01 04:14:22.121018', 'step': 1403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:22.152562', 'step': 1403, 'epoch': 1} {'type': 'loss', 'content': 0.16395623981952667, 'timestamp': '2025-10-01 04:14:22.176098', 'step': 1404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:22.208217', 'step': 1404, 'epoch': 1} {'type': 'loss', 'content': 0.17372800409793854, 'timestamp': '2025-10-01 04:14:22.210122', 'step': 1405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:22.242546', 'step': 1405, 'epoch': 1} {'type': 'loss', 'content': 0.14520540833473206, 'timestamp': '2025-10-01 04:14:22.245401', 'step': 1406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:22.279377', 'step': 1406, 'epoch': 1} {'type': 'loss', 'content': 0.13536472618579865, 'timestamp': '2025-10-01 04:14:22.281829', 'step': 1407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:22.313582', 'step': 1407, 'epoch': 1} {'type': 'loss', 'content': 0.2506318986415863, 'timestamp': '2025-10-01 04:14:22.336998', 'step': 1408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:22.368275', 'step': 1408, 'epoch': 1} {'type': 'loss', 'content': 0.15551239252090454, 'timestamp': '2025-10-01 04:14:22.370827', 'step': 1409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:22.403407', 'step': 1409, 'epoch': 1} {'type': 'loss', 'content': 0.1727120280265808, 'timestamp': '2025-10-01 04:14:22.405540', 'step': 1410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:22.438250', 'step': 1410, 'epoch': 1} {'type': 'loss', 'content': 0.17066343128681183, 'timestamp': '2025-10-01 04:14:22.440362', 'step': 1411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:22.472597', 'step': 1411, 'epoch': 1} {'type': 'loss', 'content': 0.12791438400745392, 'timestamp': '2025-10-01 04:14:22.496165', 'step': 1412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:22.535035', 'step': 1412, 'epoch': 1} {'type': 'loss', 'content': 0.2223261296749115, 'timestamp': '2025-10-01 04:14:22.537415', 'step': 1413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:22.569044', 'step': 1413, 'epoch': 1} {'type': 'loss', 'content': 0.19542168080806732, 'timestamp': '2025-10-01 04:14:22.571396', 'step': 1414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:22.607431', 'step': 1414, 'epoch': 1} {'type': 'loss', 'content': 0.15285111963748932, 'timestamp': '2025-10-01 04:14:22.609807', 'step': 1415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:22.642745', 'step': 1415, 'epoch': 1} {'type': 'loss', 'content': 0.09802786260843277, 'timestamp': '2025-10-01 04:14:22.666168', 'step': 1416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:22.707323', 'step': 1416, 'epoch': 1} {'type': 'loss', 'content': 0.29867979884147644, 'timestamp': '2025-10-01 04:14:22.709404', 'step': 1417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:22.745054', 'step': 1417, 'epoch': 1} {'type': 'loss', 'content': 0.18760885298252106, 'timestamp': '2025-10-01 04:14:22.748019', 'step': 1418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:22.780621', 'step': 1418, 'epoch': 1} {'type': 'loss', 'content': 0.21920153498649597, 'timestamp': '2025-10-01 04:14:22.782639', 'step': 1419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:22.813095', 'step': 1419, 'epoch': 1} {'type': 'loss', 'content': 0.2771911919116974, 'timestamp': '2025-10-01 04:14:22.836553', 'step': 1420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:22.876145', 'step': 1420, 'epoch': 1} {'type': 'loss', 'content': 0.15355831384658813, 'timestamp': '2025-10-01 04:14:22.879223', 'step': 1421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:22.911257', 'step': 1421, 'epoch': 1} {'type': 'loss', 'content': 0.1713251769542694, 'timestamp': '2025-10-01 04:14:22.914205', 'step': 1422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:22.948456', 'step': 1422, 'epoch': 1} {'type': 'loss', 'content': 0.13368940353393555, 'timestamp': '2025-10-01 04:14:22.959538', 'step': 1423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:22.991103', 'step': 1423, 'epoch': 1} {'type': 'loss', 'content': 0.2068118005990982, 'timestamp': '2025-10-01 04:14:23.014755', 'step': 1424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:23.047597', 'step': 1424, 'epoch': 1} {'type': 'loss', 'content': 0.1643415093421936, 'timestamp': '2025-10-01 04:14:23.051043', 'step': 1425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:23.090977', 'step': 1425, 'epoch': 1} {'type': 'loss', 'content': 0.2596958577632904, 'timestamp': '2025-10-01 04:14:23.093218', 'step': 1426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:23.134264', 'step': 1426, 'epoch': 1} {'type': 'loss', 'content': 0.20337671041488647, 'timestamp': '2025-10-01 04:14:23.137074', 'step': 1427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:23.171085', 'step': 1427, 'epoch': 1} {'type': 'loss', 'content': 0.16672824323177338, 'timestamp': '2025-10-01 04:14:23.194637', 'step': 1428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:23.228637', 'step': 1428, 'epoch': 1} {'type': 'loss', 'content': 0.1621081531047821, 'timestamp': '2025-10-01 04:14:23.230890', 'step': 1429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:23.266462', 'step': 1429, 'epoch': 1} {'type': 'loss', 'content': 0.19164252281188965, 'timestamp': '2025-10-01 04:14:23.268527', 'step': 1430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:23.304981', 'step': 1430, 'epoch': 1} {'type': 'loss', 'content': 0.2596864700317383, 'timestamp': '2025-10-01 04:14:23.306999', 'step': 1431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:23.339213', 'step': 1431, 'epoch': 1} {'type': 'loss', 'content': 0.21568837761878967, 'timestamp': '2025-10-01 04:14:23.362640', 'step': 1432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:23.393899', 'step': 1432, 'epoch': 1} {'type': 'loss', 'content': 0.14117228984832764, 'timestamp': '2025-10-01 04:14:23.395971', 'step': 1433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:23.427629', 'step': 1433, 'epoch': 1} {'type': 'loss', 'content': 0.29177695512771606, 'timestamp': '2025-10-01 04:14:23.430200', 'step': 1434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:23.461906', 'step': 1434, 'epoch': 1} {'type': 'loss', 'content': 0.12532813847064972, 'timestamp': '2025-10-01 04:14:23.463868', 'step': 1435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:23.505241', 'step': 1435, 'epoch': 1} {'type': 'loss', 'content': 0.13768905401229858, 'timestamp': '2025-10-01 04:14:23.528701', 'step': 1436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:23.570361', 'step': 1436, 'epoch': 1} {'type': 'loss', 'content': 0.20662716031074524, 'timestamp': '2025-10-01 04:14:23.572294', 'step': 1437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:23.603351', 'step': 1437, 'epoch': 1} {'type': 'loss', 'content': 0.16170288622379303, 'timestamp': '2025-10-01 04:14:23.605243', 'step': 1438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:23.645871', 'step': 1438, 'epoch': 1} {'type': 'loss', 'content': 0.19615612924098969, 'timestamp': '2025-10-01 04:14:23.648845', 'step': 1439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:23.687555', 'step': 1439, 'epoch': 1} {'type': 'loss', 'content': 0.19072631001472473, 'timestamp': '2025-10-01 04:14:23.711019', 'step': 1440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:23.744136', 'step': 1440, 'epoch': 1} {'type': 'loss', 'content': 0.14098365604877472, 'timestamp': '2025-10-01 04:14:23.751410', 'step': 1441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:23.789058', 'step': 1441, 'epoch': 1} {'type': 'loss', 'content': 0.2444400191307068, 'timestamp': '2025-10-01 04:14:23.790956', 'step': 1442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:23.829440', 'step': 1442, 'epoch': 1} {'type': 'loss', 'content': 0.18602722883224487, 'timestamp': '2025-10-01 04:14:23.831375', 'step': 1443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:23.869534', 'step': 1443, 'epoch': 1} {'type': 'loss', 'content': 0.2709487974643707, 'timestamp': '2025-10-01 04:14:23.892985', 'step': 1444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:23.924307', 'step': 1444, 'epoch': 1} {'type': 'loss', 'content': 0.3460521101951599, 'timestamp': '2025-10-01 04:14:23.926679', 'step': 1445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:23.967004', 'step': 1445, 'epoch': 1} {'type': 'loss', 'content': 0.1422712802886963, 'timestamp': '2025-10-01 04:14:23.969050', 'step': 1446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:24.009425', 'step': 1446, 'epoch': 1} {'type': 'loss', 'content': 0.15272025763988495, 'timestamp': '2025-10-01 04:14:24.011426', 'step': 1447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.042363', 'step': 1447, 'epoch': 1} {'type': 'loss', 'content': 0.252771258354187, 'timestamp': '2025-10-01 04:14:24.065777', 'step': 1448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.097838', 'step': 1448, 'epoch': 1} {'type': 'loss', 'content': 0.16269996762275696, 'timestamp': '2025-10-01 04:14:24.099758', 'step': 1449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:24.131517', 'step': 1449, 'epoch': 1} {'type': 'loss', 'content': 0.21977204084396362, 'timestamp': '2025-10-01 04:14:24.133441', 'step': 1450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.165428', 'step': 1450, 'epoch': 1} {'type': 'loss', 'content': 0.26314958930015564, 'timestamp': '2025-10-01 04:14:24.167358', 'step': 1451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.198276', 'step': 1451, 'epoch': 1} {'type': 'loss', 'content': 0.16891157627105713, 'timestamp': '2025-10-01 04:14:24.221831', 'step': 1452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.253791', 'step': 1452, 'epoch': 1} {'type': 'loss', 'content': 0.12855738401412964, 'timestamp': '2025-10-01 04:14:24.256743', 'step': 1453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:24.288750', 'step': 1453, 'epoch': 1} {'type': 'loss', 'content': 0.10122869908809662, 'timestamp': '2025-10-01 04:14:24.290867', 'step': 1454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.333550', 'step': 1454, 'epoch': 1} {'type': 'loss', 'content': 0.26886606216430664, 'timestamp': '2025-10-01 04:14:24.335820', 'step': 1455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:24.372035', 'step': 1455, 'epoch': 1} {'type': 'loss', 'content': 0.1032840833067894, 'timestamp': '2025-10-01 04:14:24.395739', 'step': 1456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:24.432933', 'step': 1456, 'epoch': 1} {'type': 'loss', 'content': 0.17723660171031952, 'timestamp': '2025-10-01 04:14:24.434865', 'step': 1457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:24.476268', 'step': 1457, 'epoch': 1} {'type': 'loss', 'content': 0.14680986106395721, 'timestamp': '2025-10-01 04:14:24.478228', 'step': 1458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:24.509790', 'step': 1458, 'epoch': 1} {'type': 'loss', 'content': 0.18980371952056885, 'timestamp': '2025-10-01 04:14:24.511771', 'step': 1459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.542998', 'step': 1459, 'epoch': 1} {'type': 'loss', 'content': 0.13116617500782013, 'timestamp': '2025-10-01 04:14:24.566683', 'step': 1460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:24.597877', 'step': 1460, 'epoch': 1} {'type': 'loss', 'content': 0.18990524113178253, 'timestamp': '2025-10-01 04:14:24.599770', 'step': 1461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.636080', 'step': 1461, 'epoch': 1} {'type': 'loss', 'content': 0.19159112870693207, 'timestamp': '2025-10-01 04:14:24.641525', 'step': 1462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:24.683977', 'step': 1462, 'epoch': 1} {'type': 'loss', 'content': 0.28192591667175293, 'timestamp': '2025-10-01 04:14:24.685757', 'step': 1463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.717687', 'step': 1463, 'epoch': 1} {'type': 'loss', 'content': 0.18468336760997772, 'timestamp': '2025-10-01 04:14:24.741471', 'step': 1464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:24.774010', 'step': 1464, 'epoch': 1} {'type': 'loss', 'content': 0.2030111700296402, 'timestamp': '2025-10-01 04:14:24.775975', 'step': 1465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.809601', 'step': 1465, 'epoch': 1} {'type': 'loss', 'content': 0.1986122727394104, 'timestamp': '2025-10-01 04:14:24.811648', 'step': 1466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:24.843893', 'step': 1466, 'epoch': 1} {'type': 'loss', 'content': 0.15038952231407166, 'timestamp': '2025-10-01 04:14:24.845888', 'step': 1467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:24.883918', 'step': 1467, 'epoch': 1} {'type': 'loss', 'content': 0.2371099442243576, 'timestamp': '2025-10-01 04:14:24.907882', 'step': 1468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:24.939437', 'step': 1468, 'epoch': 1} {'type': 'loss', 'content': 0.32215991616249084, 'timestamp': '2025-10-01 04:14:24.941457', 'step': 1469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:24.973337', 'step': 1469, 'epoch': 1} {'type': 'loss', 'content': 0.24200811982154846, 'timestamp': '2025-10-01 04:14:24.975977', 'step': 1470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:25.008640', 'step': 1470, 'epoch': 1} {'type': 'loss', 'content': 0.16426263749599457, 'timestamp': '2025-10-01 04:14:25.010885', 'step': 1471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:25.049606', 'step': 1471, 'epoch': 1} {'type': 'loss', 'content': 0.13276833295822144, 'timestamp': '2025-10-01 04:14:25.073535', 'step': 1472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:25.111318', 'step': 1472, 'epoch': 1} {'type': 'loss', 'content': 0.12457174062728882, 'timestamp': '2025-10-01 04:14:25.113319', 'step': 1473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:25.158410', 'step': 1473, 'epoch': 1} {'type': 'loss', 'content': 0.2021128535270691, 'timestamp': '2025-10-01 04:14:25.160550', 'step': 1474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:25.199152', 'step': 1474, 'epoch': 1} {'type': 'loss', 'content': 0.14651525020599365, 'timestamp': '2025-10-01 04:14:25.201180', 'step': 1475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:25.238131', 'step': 1475, 'epoch': 1} {'type': 'loss', 'content': 0.14846597611904144, 'timestamp': '2025-10-01 04:14:25.261820', 'step': 1476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:25.293717', 'step': 1476, 'epoch': 1} {'type': 'loss', 'content': 0.21692128479480743, 'timestamp': '2025-10-01 04:14:25.295975', 'step': 1477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:25.330366', 'step': 1477, 'epoch': 1} {'type': 'loss', 'content': 0.07707171142101288, 'timestamp': '2025-10-01 04:14:25.332514', 'step': 1478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:25.365364', 'step': 1478, 'epoch': 1} {'type': 'loss', 'content': 0.22497572004795074, 'timestamp': '2025-10-01 04:14:25.367382', 'step': 1479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:25.401979', 'step': 1479, 'epoch': 1} {'type': 'loss', 'content': 0.1904914677143097, 'timestamp': '2025-10-01 04:14:25.425749', 'step': 1480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:25.456980', 'step': 1480, 'epoch': 1} {'type': 'loss', 'content': 0.11595246940851212, 'timestamp': '2025-10-01 04:14:25.459078', 'step': 1481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:25.494295', 'step': 1481, 'epoch': 1} {'type': 'loss', 'content': 0.18432985246181488, 'timestamp': '2025-10-01 04:14:25.496671', 'step': 1482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:25.533978', 'step': 1482, 'epoch': 1} {'type': 'loss', 'content': 0.15783409774303436, 'timestamp': '2025-10-01 04:14:25.536416', 'step': 1483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:25.569297', 'step': 1483, 'epoch': 1} {'type': 'loss', 'content': 0.13051745295524597, 'timestamp': '2025-10-01 04:14:25.593127', 'step': 1484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:25.626347', 'step': 1484, 'epoch': 1} {'type': 'loss', 'content': 0.2588050663471222, 'timestamp': '2025-10-01 04:14:25.628988', 'step': 1485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:25.664307', 'step': 1485, 'epoch': 1} {'type': 'loss', 'content': 0.19255316257476807, 'timestamp': '2025-10-01 04:14:25.667859', 'step': 1486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:25.702771', 'step': 1486, 'epoch': 1} {'type': 'loss', 'content': 0.1262376755475998, 'timestamp': '2025-10-01 04:14:25.705197', 'step': 1487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:25.738928', 'step': 1487, 'epoch': 1} {'type': 'loss', 'content': 0.16302096843719482, 'timestamp': '2025-10-01 04:14:25.762761', 'step': 1488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:25.806432', 'step': 1488, 'epoch': 1} {'type': 'loss', 'content': 0.2008105367422104, 'timestamp': '2025-10-01 04:14:25.808549', 'step': 1489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:25.842107', 'step': 1489, 'epoch': 1} {'type': 'loss', 'content': 0.21564990282058716, 'timestamp': '2025-10-01 04:14:25.844180', 'step': 1490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:25.878915', 'step': 1490, 'epoch': 1} {'type': 'loss', 'content': 0.16359518468379974, 'timestamp': '2025-10-01 04:14:25.881137', 'step': 1491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:25.912235', 'step': 1491, 'epoch': 1} {'type': 'loss', 'content': 0.18799296021461487, 'timestamp': '2025-10-01 04:14:25.936015', 'step': 1492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:25.969520', 'step': 1492, 'epoch': 1} {'type': 'loss', 'content': 0.26544392108917236, 'timestamp': '2025-10-01 04:14:25.971768', 'step': 1493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:26.008708', 'step': 1493, 'epoch': 1} {'type': 'loss', 'content': 0.1586405336856842, 'timestamp': '2025-10-01 04:14:26.011004', 'step': 1494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:26.047277', 'step': 1494, 'epoch': 1} {'type': 'loss', 'content': 0.19591164588928223, 'timestamp': '2025-10-01 04:14:26.049605', 'step': 1495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:26.086976', 'step': 1495, 'epoch': 1} {'type': 'loss', 'content': 0.3287150263786316, 'timestamp': '2025-10-01 04:14:26.110456', 'step': 1496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:26.144028', 'step': 1496, 'epoch': 1} {'type': 'loss', 'content': 0.1726025938987732, 'timestamp': '2025-10-01 04:14:26.146213', 'step': 1497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:26.178551', 'step': 1497, 'epoch': 1} {'type': 'loss', 'content': 0.13835453987121582, 'timestamp': '2025-10-01 04:14:26.181035', 'step': 1498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:26.219548', 'step': 1498, 'epoch': 1} {'type': 'loss', 'content': 0.11671740561723709, 'timestamp': '2025-10-01 04:14:26.221867', 'step': 1499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:26.260597', 'step': 1499, 'epoch': 1} {'type': 'loss', 'content': 0.19145303964614868, 'timestamp': '2025-10-01 04:14:26.284150', 'step': 1500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-10-01 04:14:31.194936', 'step': 1500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:31.228362', 'step': 1500, 'epoch': 1} {'type': 'loss', 'content': 0.20712298154830933, 'timestamp': '2025-10-01 04:14:31.230591', 'step': 1501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:31.270458', 'step': 1501, 'epoch': 1} {'type': 'loss', 'content': 0.3058948218822479, 'timestamp': '2025-10-01 04:14:31.272407', 'step': 1502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:31.308691', 'step': 1502, 'epoch': 1} {'type': 'loss', 'content': 0.16287004947662354, 'timestamp': '2025-10-01 04:14:31.314268', 'step': 1503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:31.356527', 'step': 1503, 'epoch': 1} {'type': 'loss', 'content': 0.12193859368562698, 'timestamp': '2025-10-01 04:14:31.385275', 'step': 1504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:31.429624', 'step': 1504, 'epoch': 1} {'type': 'loss', 'content': 0.1789969652891159, 'timestamp': '2025-10-01 04:14:31.432650', 'step': 1505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:31.478628', 'step': 1505, 'epoch': 1} {'type': 'loss', 'content': 0.17909973859786987, 'timestamp': '2025-10-01 04:14:31.481544', 'step': 1506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:31.514158', 'step': 1506, 'epoch': 1} {'type': 'loss', 'content': 0.20114383101463318, 'timestamp': '2025-10-01 04:14:31.516686', 'step': 1507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:31.550220', 'step': 1507, 'epoch': 1} {'type': 'loss', 'content': 0.16807329654693604, 'timestamp': '2025-10-01 04:14:31.575215', 'step': 1508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:31.617341', 'step': 1508, 'epoch': 1} {'type': 'loss', 'content': 0.15882399678230286, 'timestamp': '2025-10-01 04:14:31.625432', 'step': 1509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:31.664598', 'step': 1509, 'epoch': 1} {'type': 'loss', 'content': 0.20228992402553558, 'timestamp': '2025-10-01 04:14:31.670865', 'step': 1510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:14:31.726969', 'step': 1510, 'epoch': 1} {'type': 'loss', 'content': 0.15480130910873413, 'timestamp': '2025-10-01 04:14:31.731739', 'step': 1511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:31.777816', 'step': 1511, 'epoch': 1} {'type': 'loss', 'content': 0.18026167154312134, 'timestamp': '2025-10-01 04:14:31.804433', 'step': 1512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:31.856876', 'step': 1512, 'epoch': 1} {'type': 'loss', 'content': 0.16304247081279755, 'timestamp': '2025-10-01 04:14:31.863864', 'step': 1513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:31.905841', 'step': 1513, 'epoch': 1} {'type': 'loss', 'content': 0.3698572814464569, 'timestamp': '2025-10-01 04:14:31.908157', 'step': 1514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:31.952614', 'step': 1514, 'epoch': 1} {'type': 'loss', 'content': 0.24161967635154724, 'timestamp': '2025-10-01 04:14:31.964629', 'step': 1515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:32.023404', 'step': 1515, 'epoch': 1} {'type': 'loss', 'content': 0.18408100306987762, 'timestamp': '2025-10-01 04:14:32.053832', 'step': 1516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:32.088235', 'step': 1516, 'epoch': 1} {'type': 'loss', 'content': 0.15615451335906982, 'timestamp': '2025-10-01 04:14:32.093156', 'step': 1517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:32.128321', 'step': 1517, 'epoch': 1} {'type': 'loss', 'content': 0.2177673578262329, 'timestamp': '2025-10-01 04:14:32.137376', 'step': 1518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:32.181427', 'step': 1518, 'epoch': 1} {'type': 'loss', 'content': 0.34341195225715637, 'timestamp': '2025-10-01 04:14:32.195211', 'step': 1519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:32.246944', 'step': 1519, 'epoch': 1} {'type': 'loss', 'content': 0.09900472313165665, 'timestamp': '2025-10-01 04:14:32.275941', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:32.309632', 'step': 1520, 'epoch': 1} {'type': 'loss', 'content': 0.05586807429790497, 'timestamp': '2025-10-01 04:14:32.320567', 'step': 1521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:32.355284', 'step': 1521, 'epoch': 1} {'type': 'loss', 'content': 0.19024544954299927, 'timestamp': '2025-10-01 04:14:32.358375', 'step': 1522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:32.392448', 'step': 1522, 'epoch': 1} {'type': 'loss', 'content': 0.19293130934238434, 'timestamp': '2025-10-01 04:14:32.415283', 'step': 1523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:32.451336', 'step': 1523, 'epoch': 1} {'type': 'loss', 'content': 0.20995189249515533, 'timestamp': '2025-10-01 04:14:32.483875', 'step': 1524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:32.524391', 'step': 1524, 'epoch': 1} {'type': 'loss', 'content': 0.21077574789524078, 'timestamp': '2025-10-01 04:14:32.531680', 'step': 1525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:32.576887', 'step': 1525, 'epoch': 1} {'type': 'loss', 'content': 0.20962250232696533, 'timestamp': '2025-10-01 04:14:32.578987', 'step': 1526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:32.619089', 'step': 1526, 'epoch': 1} {'type': 'loss', 'content': 0.2343432754278183, 'timestamp': '2025-10-01 04:14:32.621224', 'step': 1527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:32.666911', 'step': 1527, 'epoch': 1} {'type': 'loss', 'content': 0.12459657341241837, 'timestamp': '2025-10-01 04:14:32.693297', 'step': 1528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:32.743464', 'step': 1528, 'epoch': 1} {'type': 'loss', 'content': 0.20474010705947876, 'timestamp': '2025-10-01 04:14:32.745998', 'step': 1529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:32.787918', 'step': 1529, 'epoch': 1} {'type': 'loss', 'content': 0.1462458372116089, 'timestamp': '2025-10-01 04:14:32.791977', 'step': 1530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:32.834191', 'step': 1530, 'epoch': 1} {'type': 'loss', 'content': 0.1505652368068695, 'timestamp': '2025-10-01 04:14:32.843979', 'step': 1531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:32.890162', 'step': 1531, 'epoch': 1} {'type': 'loss', 'content': 0.21639931201934814, 'timestamp': '2025-10-01 04:14:32.914676', 'step': 1532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:32.954456', 'step': 1532, 'epoch': 1} {'type': 'loss', 'content': 0.16332454979419708, 'timestamp': '2025-10-01 04:14:32.963865', 'step': 1533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:33.017962', 'step': 1533, 'epoch': 1} {'type': 'loss', 'content': 0.14644581079483032, 'timestamp': '2025-10-01 04:14:33.019961', 'step': 1534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:33.053744', 'step': 1534, 'epoch': 1} {'type': 'loss', 'content': 0.25243762135505676, 'timestamp': '2025-10-01 04:14:33.057999', 'step': 1535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:33.098509', 'step': 1535, 'epoch': 1} {'type': 'loss', 'content': 0.0964394360780716, 'timestamp': '2025-10-01 04:14:33.125200', 'step': 1536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:33.161316', 'step': 1536, 'epoch': 1} {'type': 'loss', 'content': 0.13117526471614838, 'timestamp': '2025-10-01 04:14:33.164920', 'step': 1537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:33.200205', 'step': 1537, 'epoch': 1} {'type': 'loss', 'content': 0.2750139832496643, 'timestamp': '2025-10-01 04:14:33.203421', 'step': 1538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:33.240016', 'step': 1538, 'epoch': 1} {'type': 'loss', 'content': 0.18683314323425293, 'timestamp': '2025-10-01 04:14:33.242684', 'step': 1539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:33.275974', 'step': 1539, 'epoch': 1} {'type': 'loss', 'content': 0.1890128254890442, 'timestamp': '2025-10-01 04:14:33.313678', 'step': 1540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:33.359681', 'step': 1540, 'epoch': 1} {'type': 'loss', 'content': 0.25192296504974365, 'timestamp': '2025-10-01 04:14:33.362767', 'step': 1541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:33.402888', 'step': 1541, 'epoch': 1} {'type': 'loss', 'content': 0.2217671126127243, 'timestamp': '2025-10-01 04:14:33.405497', 'step': 1542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:33.441100', 'step': 1542, 'epoch': 1} {'type': 'loss', 'content': 0.10090997070074081, 'timestamp': '2025-10-01 04:14:33.443847', 'step': 1543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:33.489463', 'step': 1543, 'epoch': 1} {'type': 'loss', 'content': 0.12339116632938385, 'timestamp': '2025-10-01 04:14:33.513491', 'step': 1544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:33.558202', 'step': 1544, 'epoch': 1} {'type': 'loss', 'content': 0.3111255466938019, 'timestamp': '2025-10-01 04:14:33.573924', 'step': 1545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:33.609518', 'step': 1545, 'epoch': 1} {'type': 'loss', 'content': 0.1286955177783966, 'timestamp': '2025-10-01 04:14:33.615661', 'step': 1546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:33.649981', 'step': 1546, 'epoch': 1} {'type': 'loss', 'content': 0.1035626232624054, 'timestamp': '2025-10-01 04:14:33.662853', 'step': 1547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:33.697911', 'step': 1547, 'epoch': 1} {'type': 'loss', 'content': 0.19961294531822205, 'timestamp': '2025-10-01 04:14:33.725462', 'step': 1548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:33.765980', 'step': 1548, 'epoch': 1} {'type': 'loss', 'content': 0.1700415015220642, 'timestamp': '2025-10-01 04:14:33.768182', 'step': 1549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:33.803545', 'step': 1549, 'epoch': 1} {'type': 'loss', 'content': 0.12257273495197296, 'timestamp': '2025-10-01 04:14:33.805982', 'step': 1550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:33.846191', 'step': 1550, 'epoch': 1} {'type': 'loss', 'content': 0.14262449741363525, 'timestamp': '2025-10-01 04:14:33.848692', 'step': 1551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:33.881958', 'step': 1551, 'epoch': 1} {'type': 'loss', 'content': 0.10527145117521286, 'timestamp': '2025-10-01 04:14:33.905683', 'step': 1552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:33.938091', 'step': 1552, 'epoch': 1} {'type': 'loss', 'content': 0.18262997269630432, 'timestamp': '2025-10-01 04:14:33.940217', 'step': 1553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:33.974871', 'step': 1553, 'epoch': 1} {'type': 'loss', 'content': 0.17977413535118103, 'timestamp': '2025-10-01 04:14:33.977162', 'step': 1554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:34.012374', 'step': 1554, 'epoch': 1} {'type': 'loss', 'content': 0.3031778037548065, 'timestamp': '2025-10-01 04:14:34.014473', 'step': 1555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:34.048610', 'step': 1555, 'epoch': 1} {'type': 'loss', 'content': 0.18653082847595215, 'timestamp': '2025-10-01 04:14:34.072571', 'step': 1556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:34.106454', 'step': 1556, 'epoch': 1} {'type': 'loss', 'content': 0.1910366714000702, 'timestamp': '2025-10-01 04:14:34.108731', 'step': 1557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:34.142391', 'step': 1557, 'epoch': 1} {'type': 'loss', 'content': 0.13492834568023682, 'timestamp': '2025-10-01 04:14:34.144282', 'step': 1558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:34.180381', 'step': 1558, 'epoch': 1} {'type': 'loss', 'content': 0.19019129872322083, 'timestamp': '2025-10-01 04:14:34.183393', 'step': 1559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:34.219370', 'step': 1559, 'epoch': 1} {'type': 'loss', 'content': 0.2103653848171234, 'timestamp': '2025-10-01 04:14:34.242983', 'step': 1560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:34.279729', 'step': 1560, 'epoch': 1} {'type': 'loss', 'content': 0.21523542702198029, 'timestamp': '2025-10-01 04:14:34.281846', 'step': 1561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:34.325547', 'step': 1561, 'epoch': 1} {'type': 'loss', 'content': 0.17683002352714539, 'timestamp': '2025-10-01 04:14:34.327670', 'step': 1562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:34.364025', 'step': 1562, 'epoch': 1} {'type': 'loss', 'content': 0.19496116042137146, 'timestamp': '2025-10-01 04:14:34.366455', 'step': 1563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:34.400699', 'step': 1563, 'epoch': 1} {'type': 'loss', 'content': 0.15875177085399628, 'timestamp': '2025-10-01 04:14:34.424120', 'step': 1564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:34.458717', 'step': 1564, 'epoch': 1} {'type': 'loss', 'content': 0.1561087965965271, 'timestamp': '2025-10-01 04:14:34.460537', 'step': 1565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:34.501122', 'step': 1565, 'epoch': 1} {'type': 'loss', 'content': 0.2384667545557022, 'timestamp': '2025-10-01 04:14:34.503868', 'step': 1566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:34.537940', 'step': 1566, 'epoch': 1} {'type': 'loss', 'content': 0.10431293398141861, 'timestamp': '2025-10-01 04:14:34.540038', 'step': 1567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:34.573609', 'step': 1567, 'epoch': 1} {'type': 'loss', 'content': 0.24721580743789673, 'timestamp': '2025-10-01 04:14:34.596988', 'step': 1568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:34.640301', 'step': 1568, 'epoch': 1} {'type': 'loss', 'content': 0.14394356310367584, 'timestamp': '2025-10-01 04:14:34.642345', 'step': 1569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:34.687546', 'step': 1569, 'epoch': 1} {'type': 'loss', 'content': 0.1966317594051361, 'timestamp': '2025-10-01 04:14:34.689456', 'step': 1570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:34.725955', 'step': 1570, 'epoch': 1} {'type': 'loss', 'content': 0.1363602578639984, 'timestamp': '2025-10-01 04:14:34.727881', 'step': 1571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:34.761686', 'step': 1571, 'epoch': 1} {'type': 'loss', 'content': 0.304894357919693, 'timestamp': '2025-10-01 04:14:34.786721', 'step': 1572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:34.820824', 'step': 1572, 'epoch': 1} {'type': 'loss', 'content': 0.14652493596076965, 'timestamp': '2025-10-01 04:14:34.822783', 'step': 1573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:14:34.857237', 'step': 1573, 'epoch': 1} {'type': 'loss', 'content': 0.09850817173719406, 'timestamp': '2025-10-01 04:14:34.861618', 'step': 1574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:34.903856', 'step': 1574, 'epoch': 1} {'type': 'loss', 'content': 0.20121130347251892, 'timestamp': '2025-10-01 04:14:34.905760', 'step': 1575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:34.945734', 'step': 1575, 'epoch': 1} {'type': 'loss', 'content': 0.19367845356464386, 'timestamp': '2025-10-01 04:14:34.969097', 'step': 1576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:35.004001', 'step': 1576, 'epoch': 1} {'type': 'loss', 'content': 0.18163937330245972, 'timestamp': '2025-10-01 04:14:35.005770', 'step': 1577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:35.037826', 'step': 1577, 'epoch': 1} {'type': 'loss', 'content': 0.14870086312294006, 'timestamp': '2025-10-01 04:14:35.039581', 'step': 1578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:35.074111', 'step': 1578, 'epoch': 1} {'type': 'loss', 'content': 0.1585649996995926, 'timestamp': '2025-10-01 04:14:35.075916', 'step': 1579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:35.109545', 'step': 1579, 'epoch': 1} {'type': 'loss', 'content': 0.2057134509086609, 'timestamp': '2025-10-01 04:14:35.133000', 'step': 1580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:35.170532', 'step': 1580, 'epoch': 1} {'type': 'loss', 'content': 0.13917772471904755, 'timestamp': '2025-10-01 04:14:35.172385', 'step': 1581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:35.204592', 'step': 1581, 'epoch': 1} {'type': 'loss', 'content': 0.20986241102218628, 'timestamp': '2025-10-01 04:14:35.206622', 'step': 1582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:35.237988', 'step': 1582, 'epoch': 1} {'type': 'loss', 'content': 0.2253739833831787, 'timestamp': '2025-10-01 04:14:35.240036', 'step': 1583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:35.272434', 'step': 1583, 'epoch': 1} {'type': 'loss', 'content': 0.17615428566932678, 'timestamp': '2025-10-01 04:14:35.295883', 'step': 1584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:35.332641', 'step': 1584, 'epoch': 1} {'type': 'loss', 'content': 0.1485094577074051, 'timestamp': '2025-10-01 04:14:35.334824', 'step': 1585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:35.369060', 'step': 1585, 'epoch': 1} {'type': 'loss', 'content': 0.14579930901527405, 'timestamp': '2025-10-01 04:14:35.371204', 'step': 1586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:35.409749', 'step': 1586, 'epoch': 1} {'type': 'loss', 'content': 0.27921226620674133, 'timestamp': '2025-10-01 04:14:35.411741', 'step': 1587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:35.443242', 'step': 1587, 'epoch': 1} {'type': 'loss', 'content': 0.16399355232715607, 'timestamp': '2025-10-01 04:14:35.466698', 'step': 1588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:35.499516', 'step': 1588, 'epoch': 1} {'type': 'loss', 'content': 0.12825384736061096, 'timestamp': '2025-10-01 04:14:35.501575', 'step': 1589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:35.533924', 'step': 1589, 'epoch': 1} {'type': 'loss', 'content': 0.22010450065135956, 'timestamp': '2025-10-01 04:14:35.535758', 'step': 1590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:35.568435', 'step': 1590, 'epoch': 1} {'type': 'loss', 'content': 0.1741754114627838, 'timestamp': '2025-10-01 04:14:35.570429', 'step': 1591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:35.608297', 'step': 1591, 'epoch': 1} {'type': 'loss', 'content': 0.24480076134204865, 'timestamp': '2025-10-01 04:14:35.631793', 'step': 1592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:35.667510', 'step': 1592, 'epoch': 1} {'type': 'loss', 'content': 0.15206296741962433, 'timestamp': '2025-10-01 04:14:35.669549', 'step': 1593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:35.700910', 'step': 1593, 'epoch': 1} {'type': 'loss', 'content': 0.28534844517707825, 'timestamp': '2025-10-01 04:14:35.702714', 'step': 1594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:35.734955', 'step': 1594, 'epoch': 1} {'type': 'loss', 'content': 0.1537816971540451, 'timestamp': '2025-10-01 04:14:35.736891', 'step': 1595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:35.771724', 'step': 1595, 'epoch': 1} {'type': 'loss', 'content': 0.2253505289554596, 'timestamp': '2025-10-01 04:14:35.795347', 'step': 1596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:35.830294', 'step': 1596, 'epoch': 1} {'type': 'loss', 'content': 0.2046084702014923, 'timestamp': '2025-10-01 04:14:35.832684', 'step': 1597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:35.865504', 'step': 1597, 'epoch': 1} {'type': 'loss', 'content': 0.142426535487175, 'timestamp': '2025-10-01 04:14:35.867424', 'step': 1598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:35.907564', 'step': 1598, 'epoch': 1} {'type': 'loss', 'content': 0.20022982358932495, 'timestamp': '2025-10-01 04:14:35.910942', 'step': 1599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:35.948093', 'step': 1599, 'epoch': 1} {'type': 'loss', 'content': 0.19726905226707458, 'timestamp': '2025-10-01 04:14:35.971623', 'step': 1600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:36.013227', 'step': 1600, 'epoch': 1} {'type': 'loss', 'content': 0.17252181470394135, 'timestamp': '2025-10-01 04:14:36.015203', 'step': 1601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:36.055656', 'step': 1601, 'epoch': 1} {'type': 'loss', 'content': 0.14689771831035614, 'timestamp': '2025-10-01 04:14:36.057626', 'step': 1602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:14:36.093417', 'step': 1602, 'epoch': 1} {'type': 'loss', 'content': 0.13972917199134827, 'timestamp': '2025-10-01 04:14:36.097723', 'step': 1603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:36.140265', 'step': 1603, 'epoch': 1} {'type': 'loss', 'content': 0.18007415533065796, 'timestamp': '2025-10-01 04:14:36.163833', 'step': 1604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:36.196141', 'step': 1604, 'epoch': 1} {'type': 'loss', 'content': 0.15143269300460815, 'timestamp': '2025-10-01 04:14:36.198292', 'step': 1605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:36.231573', 'step': 1605, 'epoch': 1} {'type': 'loss', 'content': 0.21508806943893433, 'timestamp': '2025-10-01 04:14:36.233594', 'step': 1606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:36.268693', 'step': 1606, 'epoch': 1} {'type': 'loss', 'content': 0.10841095447540283, 'timestamp': '2025-10-01 04:14:36.271059', 'step': 1607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:36.309730', 'step': 1607, 'epoch': 1} {'type': 'loss', 'content': 0.19632793962955475, 'timestamp': '2025-10-01 04:14:36.333230', 'step': 1608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:36.367802', 'step': 1608, 'epoch': 1} {'type': 'loss', 'content': 0.1898157000541687, 'timestamp': '2025-10-01 04:14:36.369551', 'step': 1609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:36.410470', 'step': 1609, 'epoch': 1} {'type': 'loss', 'content': 0.2085195630788803, 'timestamp': '2025-10-01 04:14:36.412381', 'step': 1610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:36.448128', 'step': 1610, 'epoch': 1} {'type': 'loss', 'content': 0.22296608984470367, 'timestamp': '2025-10-01 04:14:36.454438', 'step': 1611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:36.492417', 'step': 1611, 'epoch': 1} {'type': 'loss', 'content': 0.22750605642795563, 'timestamp': '2025-10-01 04:14:36.515984', 'step': 1612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:36.556349', 'step': 1612, 'epoch': 1} {'type': 'loss', 'content': 0.13854609429836273, 'timestamp': '2025-10-01 04:14:36.558326', 'step': 1613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:36.594275', 'step': 1613, 'epoch': 1} {'type': 'loss', 'content': 0.14854107797145844, 'timestamp': '2025-10-01 04:14:36.596378', 'step': 1614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:36.633684', 'step': 1614, 'epoch': 1} {'type': 'loss', 'content': 0.2847326993942261, 'timestamp': '2025-10-01 04:14:36.635760', 'step': 1615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:36.668632', 'step': 1615, 'epoch': 1} {'type': 'loss', 'content': 0.14711199700832367, 'timestamp': '2025-10-01 04:14:36.692129', 'step': 1616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:36.724802', 'step': 1616, 'epoch': 1} {'type': 'loss', 'content': 0.2139250934123993, 'timestamp': '2025-10-01 04:14:36.727037', 'step': 1617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:36.762905', 'step': 1617, 'epoch': 1} {'type': 'loss', 'content': 0.19311553239822388, 'timestamp': '2025-10-01 04:14:36.766120', 'step': 1618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:36.805506', 'step': 1618, 'epoch': 1} {'type': 'loss', 'content': 0.1887550801038742, 'timestamp': '2025-10-01 04:14:36.814627', 'step': 1619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:36.852167', 'step': 1619, 'epoch': 1} {'type': 'loss', 'content': 0.13811157643795013, 'timestamp': '2025-10-01 04:14:36.875618', 'step': 1620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:36.909926', 'step': 1620, 'epoch': 1} {'type': 'loss', 'content': 0.24503685534000397, 'timestamp': '2025-10-01 04:14:36.911734', 'step': 1621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:36.948447', 'step': 1621, 'epoch': 1} {'type': 'loss', 'content': 0.20161934196949005, 'timestamp': '2025-10-01 04:14:36.950274', 'step': 1622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:36.988246', 'step': 1622, 'epoch': 1} {'type': 'loss', 'content': 0.19522914290428162, 'timestamp': '2025-10-01 04:14:36.991015', 'step': 1623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:37.025854', 'step': 1623, 'epoch': 1} {'type': 'loss', 'content': 0.3247939348220825, 'timestamp': '2025-10-01 04:14:37.049257', 'step': 1624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:37.081072', 'step': 1624, 'epoch': 1} {'type': 'loss', 'content': 0.14584538340568542, 'timestamp': '2025-10-01 04:14:37.082921', 'step': 1625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:37.116370', 'step': 1625, 'epoch': 1} {'type': 'loss', 'content': 0.08303463459014893, 'timestamp': '2025-10-01 04:14:37.118356', 'step': 1626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:37.152442', 'step': 1626, 'epoch': 1} {'type': 'loss', 'content': 0.14815807342529297, 'timestamp': '2025-10-01 04:14:37.154377', 'step': 1627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:37.187131', 'step': 1627, 'epoch': 1} {'type': 'loss', 'content': 0.14861823618412018, 'timestamp': '2025-10-01 04:14:37.210749', 'step': 1628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:37.243140', 'step': 1628, 'epoch': 1} {'type': 'loss', 'content': 0.1326543092727661, 'timestamp': '2025-10-01 04:14:37.245151', 'step': 1629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:37.286974', 'step': 1629, 'epoch': 1} {'type': 'loss', 'content': 0.14966519176959991, 'timestamp': '2025-10-01 04:14:37.289004', 'step': 1630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:37.325892', 'step': 1630, 'epoch': 1} {'type': 'loss', 'content': 0.11087934672832489, 'timestamp': '2025-10-01 04:14:37.327828', 'step': 1631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:37.362403', 'step': 1631, 'epoch': 1} {'type': 'loss', 'content': 0.17632141709327698, 'timestamp': '2025-10-01 04:14:37.385940', 'step': 1632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:37.427083', 'step': 1632, 'epoch': 1} {'type': 'loss', 'content': 0.16071777045726776, 'timestamp': '2025-10-01 04:14:37.429086', 'step': 1633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:37.462482', 'step': 1633, 'epoch': 1} {'type': 'loss', 'content': 0.1503540575504303, 'timestamp': '2025-10-01 04:14:37.464443', 'step': 1634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:37.499261', 'step': 1634, 'epoch': 1} {'type': 'loss', 'content': 0.17986679077148438, 'timestamp': '2025-10-01 04:14:37.501180', 'step': 1635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:37.540422', 'step': 1635, 'epoch': 1} {'type': 'loss', 'content': 0.1740955263376236, 'timestamp': '2025-10-01 04:14:37.563765', 'step': 1636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:37.595428', 'step': 1636, 'epoch': 1} {'type': 'loss', 'content': 0.22696121037006378, 'timestamp': '2025-10-01 04:14:37.597285', 'step': 1637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:37.630939', 'step': 1637, 'epoch': 1} {'type': 'loss', 'content': 0.3219762444496155, 'timestamp': '2025-10-01 04:14:37.632893', 'step': 1638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:37.664120', 'step': 1638, 'epoch': 1} {'type': 'loss', 'content': 0.2636808753013611, 'timestamp': '2025-10-01 04:14:37.666047', 'step': 1639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:37.699077', 'step': 1639, 'epoch': 1} {'type': 'loss', 'content': 0.2087305635213852, 'timestamp': '2025-10-01 04:14:37.722645', 'step': 1640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:37.767660', 'step': 1640, 'epoch': 1} {'type': 'loss', 'content': 0.19840988516807556, 'timestamp': '2025-10-01 04:14:37.769468', 'step': 1641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:37.812855', 'step': 1641, 'epoch': 1} {'type': 'loss', 'content': 0.1383320689201355, 'timestamp': '2025-10-01 04:14:37.814850', 'step': 1642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:37.847790', 'step': 1642, 'epoch': 1} {'type': 'loss', 'content': 0.16268590092658997, 'timestamp': '2025-10-01 04:14:37.849856', 'step': 1643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:37.885225', 'step': 1643, 'epoch': 1} {'type': 'loss', 'content': 0.2734498083591461, 'timestamp': '2025-10-01 04:14:37.908899', 'step': 1644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:37.943712', 'step': 1644, 'epoch': 1} {'type': 'loss', 'content': 0.1135256290435791, 'timestamp': '2025-10-01 04:14:37.946631', 'step': 1645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:37.981229', 'step': 1645, 'epoch': 1} {'type': 'loss', 'content': 0.12116547673940659, 'timestamp': '2025-10-01 04:14:37.983251', 'step': 1646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.019166', 'step': 1646, 'epoch': 1} {'type': 'loss', 'content': 0.13035279512405396, 'timestamp': '2025-10-01 04:14:38.021090', 'step': 1647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:38.055771', 'step': 1647, 'epoch': 1} {'type': 'loss', 'content': 0.1658748835325241, 'timestamp': '2025-10-01 04:14:38.079566', 'step': 1648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.114776', 'step': 1648, 'epoch': 1} {'type': 'loss', 'content': 0.15971732139587402, 'timestamp': '2025-10-01 04:14:38.116752', 'step': 1649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.155110', 'step': 1649, 'epoch': 1} {'type': 'loss', 'content': 0.18498866260051727, 'timestamp': '2025-10-01 04:14:38.157190', 'step': 1650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.190422', 'step': 1650, 'epoch': 1} {'type': 'loss', 'content': 0.17725031077861786, 'timestamp': '2025-10-01 04:14:38.192395', 'step': 1651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.228063', 'step': 1651, 'epoch': 1} {'type': 'loss', 'content': 0.1134205013513565, 'timestamp': '2025-10-01 04:14:38.251519', 'step': 1652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:38.300628', 'step': 1652, 'epoch': 1} {'type': 'loss', 'content': 0.20635385811328888, 'timestamp': '2025-10-01 04:14:38.302580', 'step': 1653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.338413', 'step': 1653, 'epoch': 1} {'type': 'loss', 'content': 0.12242358177900314, 'timestamp': '2025-10-01 04:14:38.340412', 'step': 1654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.377442', 'step': 1654, 'epoch': 1} {'type': 'loss', 'content': 0.2070997804403305, 'timestamp': '2025-10-01 04:14:38.379399', 'step': 1655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:38.415256', 'step': 1655, 'epoch': 1} {'type': 'loss', 'content': 0.3008708357810974, 'timestamp': '2025-10-01 04:14:38.438691', 'step': 1656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:38.475101', 'step': 1656, 'epoch': 1} {'type': 'loss', 'content': 0.16888312995433807, 'timestamp': '2025-10-01 04:14:38.477049', 'step': 1657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:38.516153', 'step': 1657, 'epoch': 1} {'type': 'loss', 'content': 0.11323776841163635, 'timestamp': '2025-10-01 04:14:38.518194', 'step': 1658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.554502', 'step': 1658, 'epoch': 1} {'type': 'loss', 'content': 0.1627892702817917, 'timestamp': '2025-10-01 04:14:38.557043', 'step': 1659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.592135', 'step': 1659, 'epoch': 1} {'type': 'loss', 'content': 0.14348618686199188, 'timestamp': '2025-10-01 04:14:38.615483', 'step': 1660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:38.649521', 'step': 1660, 'epoch': 1} {'type': 'loss', 'content': 0.15251028537750244, 'timestamp': '2025-10-01 04:14:38.651501', 'step': 1661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:38.685542', 'step': 1661, 'epoch': 1} {'type': 'loss', 'content': 0.21041250228881836, 'timestamp': '2025-10-01 04:14:38.687721', 'step': 1662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.723735', 'step': 1662, 'epoch': 1} {'type': 'loss', 'content': 0.12582823634147644, 'timestamp': '2025-10-01 04:14:38.725869', 'step': 1663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:38.762670', 'step': 1663, 'epoch': 1} {'type': 'loss', 'content': 0.12135051190853119, 'timestamp': '2025-10-01 04:14:38.786468', 'step': 1664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:38.822554', 'step': 1664, 'epoch': 1} {'type': 'loss', 'content': 0.13327637314796448, 'timestamp': '2025-10-01 04:14:38.824491', 'step': 1665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.858314', 'step': 1665, 'epoch': 1} {'type': 'loss', 'content': 0.153742253780365, 'timestamp': '2025-10-01 04:14:38.860214', 'step': 1666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:38.895162', 'step': 1666, 'epoch': 1} {'type': 'loss', 'content': 0.18783481419086456, 'timestamp': '2025-10-01 04:14:38.897320', 'step': 1667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:38.935308', 'step': 1667, 'epoch': 1} {'type': 'loss', 'content': 0.20117519795894623, 'timestamp': '2025-10-01 04:14:38.960016', 'step': 1668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:38.993576', 'step': 1668, 'epoch': 1} {'type': 'loss', 'content': 0.12089205533266068, 'timestamp': '2025-10-01 04:14:38.995526', 'step': 1669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:39.028732', 'step': 1669, 'epoch': 1} {'type': 'loss', 'content': 0.20638421177864075, 'timestamp': '2025-10-01 04:14:39.030652', 'step': 1670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:39.063550', 'step': 1670, 'epoch': 1} {'type': 'loss', 'content': 0.12759728729724884, 'timestamp': '2025-10-01 04:14:39.065657', 'step': 1671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:39.109941', 'step': 1671, 'epoch': 1} {'type': 'loss', 'content': 0.2004588097333908, 'timestamp': '2025-10-01 04:14:39.133467', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:39.169263', 'step': 1672, 'epoch': 1} {'type': 'loss', 'content': 0.18990157544612885, 'timestamp': '2025-10-01 04:14:39.171925', 'step': 1673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:39.208520', 'step': 1673, 'epoch': 1} {'type': 'loss', 'content': 0.16189153492450714, 'timestamp': '2025-10-01 04:14:39.210572', 'step': 1674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:39.246455', 'step': 1674, 'epoch': 1} {'type': 'loss', 'content': 0.21065762639045715, 'timestamp': '2025-10-01 04:14:39.248433', 'step': 1675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:39.280953', 'step': 1675, 'epoch': 1} {'type': 'loss', 'content': 0.1797313541173935, 'timestamp': '2025-10-01 04:14:39.305543', 'step': 1676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:39.339662', 'step': 1676, 'epoch': 1} {'type': 'loss', 'content': 0.13962212204933167, 'timestamp': '2025-10-01 04:14:39.344283', 'step': 1677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:39.384746', 'step': 1677, 'epoch': 1} {'type': 'loss', 'content': 0.15252424776554108, 'timestamp': '2025-10-01 04:14:39.386623', 'step': 1678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:14:39.431270', 'step': 1678, 'epoch': 1} {'type': 'loss', 'content': 0.1554947942495346, 'timestamp': '2025-10-01 04:14:39.438631', 'step': 1679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:39.471406', 'step': 1679, 'epoch': 1} {'type': 'loss', 'content': 0.17955346405506134, 'timestamp': '2025-10-01 04:14:39.494887', 'step': 1680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:39.529617', 'step': 1680, 'epoch': 1} {'type': 'loss', 'content': 0.11012905836105347, 'timestamp': '2025-10-01 04:14:39.531420', 'step': 1681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:39.565816', 'step': 1681, 'epoch': 1} {'type': 'loss', 'content': 0.1230478286743164, 'timestamp': '2025-10-01 04:14:39.567843', 'step': 1682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:39.605173', 'step': 1682, 'epoch': 1} {'type': 'loss', 'content': 0.1643153876066208, 'timestamp': '2025-10-01 04:14:39.607254', 'step': 1683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:39.643560', 'step': 1683, 'epoch': 1} {'type': 'loss', 'content': 0.20965324342250824, 'timestamp': '2025-10-01 04:14:39.674128', 'step': 1684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:39.706244', 'step': 1684, 'epoch': 1} {'type': 'loss', 'content': 0.32648900151252747, 'timestamp': '2025-10-01 04:14:39.708155', 'step': 1685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:39.744706', 'step': 1685, 'epoch': 1} {'type': 'loss', 'content': 0.1785462200641632, 'timestamp': '2025-10-01 04:14:39.746641', 'step': 1686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:39.782997', 'step': 1686, 'epoch': 1} {'type': 'loss', 'content': 0.2658342719078064, 'timestamp': '2025-10-01 04:14:39.785075', 'step': 1687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:39.850852', 'step': 1687, 'epoch': 1} {'type': 'loss', 'content': 0.11229903995990753, 'timestamp': '2025-10-01 04:14:39.874961', 'step': 1688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:39.918592', 'step': 1688, 'epoch': 1} {'type': 'loss', 'content': 0.13613031804561615, 'timestamp': '2025-10-01 04:14:39.920617', 'step': 1689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:39.954653', 'step': 1689, 'epoch': 1} {'type': 'loss', 'content': 0.2555149495601654, 'timestamp': '2025-10-01 04:14:39.956640', 'step': 1690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:39.990945', 'step': 1690, 'epoch': 1} {'type': 'loss', 'content': 0.31091248989105225, 'timestamp': '2025-10-01 04:14:39.992890', 'step': 1691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:40.024149', 'step': 1691, 'epoch': 1} {'type': 'loss', 'content': 0.14287669956684113, 'timestamp': '2025-10-01 04:14:40.047710', 'step': 1692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:40.083465', 'step': 1692, 'epoch': 1} {'type': 'loss', 'content': 0.2463628202676773, 'timestamp': '2025-10-01 04:14:40.085374', 'step': 1693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:40.120982', 'step': 1693, 'epoch': 1} {'type': 'loss', 'content': 0.14297422766685486, 'timestamp': '2025-10-01 04:14:40.123795', 'step': 1694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:40.158926', 'step': 1694, 'epoch': 1} {'type': 'loss', 'content': 0.1359238177537918, 'timestamp': '2025-10-01 04:14:40.161812', 'step': 1695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:40.198941', 'step': 1695, 'epoch': 1} {'type': 'loss', 'content': 0.13042885065078735, 'timestamp': '2025-10-01 04:14:40.222281', 'step': 1696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:40.255672', 'step': 1696, 'epoch': 1} {'type': 'loss', 'content': 0.2313426434993744, 'timestamp': '2025-10-01 04:14:40.257624', 'step': 1697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:40.290807', 'step': 1697, 'epoch': 1} {'type': 'loss', 'content': 0.17074497044086456, 'timestamp': '2025-10-01 04:14:40.292877', 'step': 1698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:40.339248', 'step': 1698, 'epoch': 1} {'type': 'loss', 'content': 0.15890079736709595, 'timestamp': '2025-10-01 04:14:40.341438', 'step': 1699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:40.373405', 'step': 1699, 'epoch': 1} {'type': 'loss', 'content': 0.20704413950443268, 'timestamp': '2025-10-01 04:14:40.396939', 'step': 1700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:40.430599', 'step': 1700, 'epoch': 1} {'type': 'loss', 'content': 0.0989147275686264, 'timestamp': '2025-10-01 04:14:40.432634', 'step': 1701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:40.475225', 'step': 1701, 'epoch': 1} {'type': 'loss', 'content': 0.1650066077709198, 'timestamp': '2025-10-01 04:14:40.477372', 'step': 1702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:40.523462', 'step': 1702, 'epoch': 1} {'type': 'loss', 'content': 0.14634624123573303, 'timestamp': '2025-10-01 04:14:40.525347', 'step': 1703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:40.557925', 'step': 1703, 'epoch': 1} {'type': 'loss', 'content': 0.20502658188343048, 'timestamp': '2025-10-01 04:14:40.581364', 'step': 1704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:40.640524', 'step': 1704, 'epoch': 1} {'type': 'loss', 'content': 0.23856833577156067, 'timestamp': '2025-10-01 04:14:40.642561', 'step': 1705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:40.677529', 'step': 1705, 'epoch': 1} {'type': 'loss', 'content': 0.153899148106575, 'timestamp': '2025-10-01 04:14:40.679378', 'step': 1706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:40.723942', 'step': 1706, 'epoch': 1} {'type': 'loss', 'content': 0.22081786394119263, 'timestamp': '2025-10-01 04:14:40.726740', 'step': 1707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:40.772779', 'step': 1707, 'epoch': 1} {'type': 'loss', 'content': 0.14191371202468872, 'timestamp': '2025-10-01 04:14:40.796119', 'step': 1708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:40.830737', 'step': 1708, 'epoch': 1} {'type': 'loss', 'content': 0.1359349936246872, 'timestamp': '2025-10-01 04:14:40.832939', 'step': 1709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:40.879703', 'step': 1709, 'epoch': 1} {'type': 'loss', 'content': 0.2072238326072693, 'timestamp': '2025-10-01 04:14:40.881911', 'step': 1710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:14:40.920155', 'step': 1710, 'epoch': 1} {'type': 'loss', 'content': 0.13071325421333313, 'timestamp': '2025-10-01 04:14:40.924202', 'step': 1711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:40.959168', 'step': 1711, 'epoch': 1} {'type': 'loss', 'content': 0.19974739849567413, 'timestamp': '2025-10-01 04:14:40.983173', 'step': 1712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:41.018317', 'step': 1712, 'epoch': 1} {'type': 'loss', 'content': 0.13118775188922882, 'timestamp': '2025-10-01 04:14:41.020631', 'step': 1713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:41.054701', 'step': 1713, 'epoch': 1} {'type': 'loss', 'content': 0.2548954486846924, 'timestamp': '2025-10-01 04:14:41.056892', 'step': 1714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:41.092307', 'step': 1714, 'epoch': 1} {'type': 'loss', 'content': 0.31301069259643555, 'timestamp': '2025-10-01 04:14:41.095570', 'step': 1715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:41.132229', 'step': 1715, 'epoch': 1} {'type': 'loss', 'content': 0.1549207866191864, 'timestamp': '2025-10-01 04:14:41.156600', 'step': 1716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:41.191999', 'step': 1716, 'epoch': 1} {'type': 'loss', 'content': 0.13376052677631378, 'timestamp': '2025-10-01 04:14:41.194636', 'step': 1717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:41.239372', 'step': 1717, 'epoch': 1} {'type': 'loss', 'content': 0.15632931888103485, 'timestamp': '2025-10-01 04:14:41.241684', 'step': 1718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:41.289174', 'step': 1718, 'epoch': 1} {'type': 'loss', 'content': 0.3221282362937927, 'timestamp': '2025-10-01 04:14:41.291425', 'step': 1719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:41.329617', 'step': 1719, 'epoch': 1} {'type': 'loss', 'content': 0.13220272958278656, 'timestamp': '2025-10-01 04:14:41.353388', 'step': 1720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:41.388107', 'step': 1720, 'epoch': 1} {'type': 'loss', 'content': 0.17615224421024323, 'timestamp': '2025-10-01 04:14:41.391124', 'step': 1721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:41.430700', 'step': 1721, 'epoch': 1} {'type': 'loss', 'content': 0.1539251208305359, 'timestamp': '2025-10-01 04:14:41.433102', 'step': 1722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:41.474441', 'step': 1722, 'epoch': 1} {'type': 'loss', 'content': 0.19100520014762878, 'timestamp': '2025-10-01 04:14:41.476966', 'step': 1723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:41.518992', 'step': 1723, 'epoch': 1} {'type': 'loss', 'content': 0.17048223316669464, 'timestamp': '2025-10-01 04:14:41.542697', 'step': 1724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:41.579107', 'step': 1724, 'epoch': 1} {'type': 'loss', 'content': 0.12313132733106613, 'timestamp': '2025-10-01 04:14:41.581329', 'step': 1725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:41.614331', 'step': 1725, 'epoch': 1} {'type': 'loss', 'content': 0.1536489874124527, 'timestamp': '2025-10-01 04:14:41.616737', 'step': 1726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:41.652017', 'step': 1726, 'epoch': 1} {'type': 'loss', 'content': 0.17135736346244812, 'timestamp': '2025-10-01 04:14:41.654275', 'step': 1727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:41.687988', 'step': 1727, 'epoch': 1} {'type': 'loss', 'content': 0.10436439514160156, 'timestamp': '2025-10-01 04:14:41.711425', 'step': 1728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:41.746390', 'step': 1728, 'epoch': 1} {'type': 'loss', 'content': 0.2450638860464096, 'timestamp': '2025-10-01 04:14:41.754356', 'step': 1729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:41.811921', 'step': 1729, 'epoch': 1} {'type': 'loss', 'content': 0.18335822224617004, 'timestamp': '2025-10-01 04:14:41.814528', 'step': 1730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:41.850288', 'step': 1730, 'epoch': 1} {'type': 'loss', 'content': 0.08982891589403152, 'timestamp': '2025-10-01 04:14:41.852684', 'step': 1731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:41.888113', 'step': 1731, 'epoch': 1} {'type': 'loss', 'content': 0.13292625546455383, 'timestamp': '2025-10-01 04:14:41.912373', 'step': 1732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:41.945017', 'step': 1732, 'epoch': 1} {'type': 'loss', 'content': 0.26894235610961914, 'timestamp': '2025-10-01 04:14:41.947238', 'step': 1733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:41.982445', 'step': 1733, 'epoch': 1} {'type': 'loss', 'content': 0.20841734111309052, 'timestamp': '2025-10-01 04:14:41.984479', 'step': 1734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:42.021108', 'step': 1734, 'epoch': 1} {'type': 'loss', 'content': 0.19087406992912292, 'timestamp': '2025-10-01 04:14:42.023388', 'step': 1735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:42.056966', 'step': 1735, 'epoch': 1} {'type': 'loss', 'content': 0.17876118421554565, 'timestamp': '2025-10-01 04:14:42.080850', 'step': 1736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:42.116958', 'step': 1736, 'epoch': 1} {'type': 'loss', 'content': 0.12684357166290283, 'timestamp': '2025-10-01 04:14:42.120537', 'step': 1737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:42.156306', 'step': 1737, 'epoch': 1} {'type': 'loss', 'content': 0.21314342319965363, 'timestamp': '2025-10-01 04:14:42.160315', 'step': 1738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:42.195153', 'step': 1738, 'epoch': 1} {'type': 'loss', 'content': 0.19633828103542328, 'timestamp': '2025-10-01 04:14:42.198338', 'step': 1739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:42.231042', 'step': 1739, 'epoch': 1} {'type': 'loss', 'content': 0.22803661227226257, 'timestamp': '2025-10-01 04:14:42.254772', 'step': 1740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:42.287972', 'step': 1740, 'epoch': 1} {'type': 'loss', 'content': 0.16881327331066132, 'timestamp': '2025-10-01 04:14:42.290352', 'step': 1741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:42.325203', 'step': 1741, 'epoch': 1} {'type': 'loss', 'content': 0.17769567668437958, 'timestamp': '2025-10-01 04:14:42.329056', 'step': 1742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:42.371747', 'step': 1742, 'epoch': 1} {'type': 'loss', 'content': 0.175215482711792, 'timestamp': '2025-10-01 04:14:42.373784', 'step': 1743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:42.408625', 'step': 1743, 'epoch': 1} {'type': 'loss', 'content': 0.1763748824596405, 'timestamp': '2025-10-01 04:14:42.432480', 'step': 1744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:42.468515', 'step': 1744, 'epoch': 1} {'type': 'loss', 'content': 0.18149104714393616, 'timestamp': '2025-10-01 04:14:42.471170', 'step': 1745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:42.513295', 'step': 1745, 'epoch': 1} {'type': 'loss', 'content': 0.1427052617073059, 'timestamp': '2025-10-01 04:14:42.515846', 'step': 1746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:42.548906', 'step': 1746, 'epoch': 1} {'type': 'loss', 'content': 0.1513734608888626, 'timestamp': '2025-10-01 04:14:42.552261', 'step': 1747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:42.586851', 'step': 1747, 'epoch': 1} {'type': 'loss', 'content': 0.1736774444580078, 'timestamp': '2025-10-01 04:14:42.610362', 'step': 1748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:42.662796', 'step': 1748, 'epoch': 1} {'type': 'loss', 'content': 0.17665398120880127, 'timestamp': '2025-10-01 04:14:42.665114', 'step': 1749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:42.713569', 'step': 1749, 'epoch': 1} {'type': 'loss', 'content': 0.14895281195640564, 'timestamp': '2025-10-01 04:14:42.715535', 'step': 1750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:42.748414', 'step': 1750, 'epoch': 1} {'type': 'loss', 'content': 0.15142659842967987, 'timestamp': '2025-10-01 04:14:42.751962', 'step': 1751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:42.786933', 'step': 1751, 'epoch': 1} {'type': 'loss', 'content': 0.26777833700180054, 'timestamp': '2025-10-01 04:14:42.810587', 'step': 1752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:42.845818', 'step': 1752, 'epoch': 1} {'type': 'loss', 'content': 0.18486803770065308, 'timestamp': '2025-10-01 04:14:42.847964', 'step': 1753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:42.886050', 'step': 1753, 'epoch': 1} {'type': 'loss', 'content': 0.22634097933769226, 'timestamp': '2025-10-01 04:14:42.888111', 'step': 1754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:42.927008', 'step': 1754, 'epoch': 1} {'type': 'loss', 'content': 0.09736356884241104, 'timestamp': '2025-10-01 04:14:42.929113', 'step': 1755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:42.976299', 'step': 1755, 'epoch': 1} {'type': 'loss', 'content': 0.1030968502163887, 'timestamp': '2025-10-01 04:14:43.000103', 'step': 1756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:43.038141', 'step': 1756, 'epoch': 1} {'type': 'loss', 'content': 0.13390006124973297, 'timestamp': '2025-10-01 04:14:43.040164', 'step': 1757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:43.077890', 'step': 1757, 'epoch': 1} {'type': 'loss', 'content': 0.22692935168743134, 'timestamp': '2025-10-01 04:14:43.079924', 'step': 1758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:43.113685', 'step': 1758, 'epoch': 1} {'type': 'loss', 'content': 0.13420937955379486, 'timestamp': '2025-10-01 04:14:43.115880', 'step': 1759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:43.149853', 'step': 1759, 'epoch': 1} {'type': 'loss', 'content': 0.12848515808582306, 'timestamp': '2025-10-01 04:14:43.173640', 'step': 1760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:43.207664', 'step': 1760, 'epoch': 1} {'type': 'loss', 'content': 0.125733882188797, 'timestamp': '2025-10-01 04:14:43.210111', 'step': 1761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:43.250788', 'step': 1761, 'epoch': 1} {'type': 'loss', 'content': 0.18263933062553406, 'timestamp': '2025-10-01 04:14:43.253100', 'step': 1762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:43.286612', 'step': 1762, 'epoch': 1} {'type': 'loss', 'content': 0.17529256641864777, 'timestamp': '2025-10-01 04:14:43.288645', 'step': 1763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:43.321141', 'step': 1763, 'epoch': 1} {'type': 'loss', 'content': 0.2241513580083847, 'timestamp': '2025-10-01 04:14:43.344720', 'step': 1764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:43.385582', 'step': 1764, 'epoch': 1} {'type': 'loss', 'content': 0.14134833216667175, 'timestamp': '2025-10-01 04:14:43.388134', 'step': 1765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:43.430987', 'step': 1765, 'epoch': 1} {'type': 'loss', 'content': 0.14053624868392944, 'timestamp': '2025-10-01 04:14:43.435550', 'step': 1766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:43.470186', 'step': 1766, 'epoch': 1} {'type': 'loss', 'content': 0.12246973812580109, 'timestamp': '2025-10-01 04:14:43.472257', 'step': 1767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:43.510960', 'step': 1767, 'epoch': 1} {'type': 'loss', 'content': 0.1634180098772049, 'timestamp': '2025-10-01 04:14:43.535132', 'step': 1768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:43.569630', 'step': 1768, 'epoch': 1} {'type': 'loss', 'content': 0.13820798695087433, 'timestamp': '2025-10-01 04:14:43.571982', 'step': 1769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:43.611389', 'step': 1769, 'epoch': 1} {'type': 'loss', 'content': 0.19732722640037537, 'timestamp': '2025-10-01 04:14:43.613878', 'step': 1770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:43.648896', 'step': 1770, 'epoch': 1} {'type': 'loss', 'content': 0.1499352753162384, 'timestamp': '2025-10-01 04:14:43.650950', 'step': 1771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:43.696096', 'step': 1771, 'epoch': 1} {'type': 'loss', 'content': 0.16519159078598022, 'timestamp': '2025-10-01 04:14:43.719941', 'step': 1772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:43.754276', 'step': 1772, 'epoch': 1} {'type': 'loss', 'content': 0.25405094027519226, 'timestamp': '2025-10-01 04:14:43.756252', 'step': 1773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:43.803136', 'step': 1773, 'epoch': 1} {'type': 'loss', 'content': 0.2368285059928894, 'timestamp': '2025-10-01 04:14:43.805284', 'step': 1774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:43.839189', 'step': 1774, 'epoch': 1} {'type': 'loss', 'content': 0.22421593964099884, 'timestamp': '2025-10-01 04:14:43.841422', 'step': 1775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:43.880086', 'step': 1775, 'epoch': 1} {'type': 'loss', 'content': 0.15994805097579956, 'timestamp': '2025-10-01 04:14:43.903539', 'step': 1776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:43.946794', 'step': 1776, 'epoch': 1} {'type': 'loss', 'content': 0.25393643975257874, 'timestamp': '2025-10-01 04:14:43.948881', 'step': 1777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:43.991995', 'step': 1777, 'epoch': 1} {'type': 'loss', 'content': 0.1881016492843628, 'timestamp': '2025-10-01 04:14:43.993948', 'step': 1778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:44.028418', 'step': 1778, 'epoch': 1} {'type': 'loss', 'content': 0.20228637754917145, 'timestamp': '2025-10-01 04:14:44.032186', 'step': 1779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:44.066918', 'step': 1779, 'epoch': 1} {'type': 'loss', 'content': 0.20548143982887268, 'timestamp': '2025-10-01 04:14:44.090615', 'step': 1780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.126982', 'step': 1780, 'epoch': 1} {'type': 'loss', 'content': 0.2868594527244568, 'timestamp': '2025-10-01 04:14:44.128973', 'step': 1781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:44.162922', 'step': 1781, 'epoch': 1} {'type': 'loss', 'content': 0.1663542091846466, 'timestamp': '2025-10-01 04:14:44.165244', 'step': 1782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.205249', 'step': 1782, 'epoch': 1} {'type': 'loss', 'content': 0.17086485028266907, 'timestamp': '2025-10-01 04:14:44.207253', 'step': 1783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:44.257091', 'step': 1783, 'epoch': 1} {'type': 'loss', 'content': 0.23205164074897766, 'timestamp': '2025-10-01 04:14:44.280779', 'step': 1784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:44.318249', 'step': 1784, 'epoch': 1} {'type': 'loss', 'content': 0.14821793138980865, 'timestamp': '2025-10-01 04:14:44.320309', 'step': 1785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.367806', 'step': 1785, 'epoch': 1} {'type': 'loss', 'content': 0.07479605078697205, 'timestamp': '2025-10-01 04:14:44.369944', 'step': 1786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.406982', 'step': 1786, 'epoch': 1} {'type': 'loss', 'content': 0.18760302662849426, 'timestamp': '2025-10-01 04:14:44.409129', 'step': 1787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:44.442919', 'step': 1787, 'epoch': 1} {'type': 'loss', 'content': 0.11142093688249588, 'timestamp': '2025-10-01 04:14:44.466553', 'step': 1788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.507017', 'step': 1788, 'epoch': 1} {'type': 'loss', 'content': 0.22683759033679962, 'timestamp': '2025-10-01 04:14:44.509207', 'step': 1789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:44.548253', 'step': 1789, 'epoch': 1} {'type': 'loss', 'content': 0.2221551090478897, 'timestamp': '2025-10-01 04:14:44.550463', 'step': 1790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:44.606416', 'step': 1790, 'epoch': 1} {'type': 'loss', 'content': 0.14172157645225525, 'timestamp': '2025-10-01 04:14:44.608519', 'step': 1791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.648055', 'step': 1791, 'epoch': 1} {'type': 'loss', 'content': 0.19735099375247955, 'timestamp': '2025-10-01 04:14:44.671674', 'step': 1792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.713515', 'step': 1792, 'epoch': 1} {'type': 'loss', 'content': 0.14723128080368042, 'timestamp': '2025-10-01 04:14:44.715647', 'step': 1793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.748136', 'step': 1793, 'epoch': 1} {'type': 'loss', 'content': 0.20603607594966888, 'timestamp': '2025-10-01 04:14:44.750274', 'step': 1794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:44.791226', 'step': 1794, 'epoch': 1} {'type': 'loss', 'content': 0.1095045804977417, 'timestamp': '2025-10-01 04:14:44.793358', 'step': 1795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.832575', 'step': 1795, 'epoch': 1} {'type': 'loss', 'content': 0.2119438648223877, 'timestamp': '2025-10-01 04:14:44.856442', 'step': 1796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.890362', 'step': 1796, 'epoch': 1} {'type': 'loss', 'content': 0.25721457600593567, 'timestamp': '2025-10-01 04:14:44.892509', 'step': 1797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:44.936345', 'step': 1797, 'epoch': 1} {'type': 'loss', 'content': 0.2663426697254181, 'timestamp': '2025-10-01 04:14:44.938447', 'step': 1798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:44.972903', 'step': 1798, 'epoch': 1} {'type': 'loss', 'content': 0.2135595828294754, 'timestamp': '2025-10-01 04:14:44.976085', 'step': 1799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:45.018424', 'step': 1799, 'epoch': 1} {'type': 'loss', 'content': 0.16930119693279266, 'timestamp': '2025-10-01 04:14:45.041995', 'step': 1800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:45.078309', 'step': 1800, 'epoch': 1} {'type': 'loss', 'content': 0.17088809609413147, 'timestamp': '2025-10-01 04:14:45.080761', 'step': 1801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:45.114964', 'step': 1801, 'epoch': 1} {'type': 'loss', 'content': 0.0920373797416687, 'timestamp': '2025-10-01 04:14:45.116980', 'step': 1802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:45.151035', 'step': 1802, 'epoch': 1} {'type': 'loss', 'content': 0.1352923959493637, 'timestamp': '2025-10-01 04:14:45.153262', 'step': 1803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:45.195681', 'step': 1803, 'epoch': 1} {'type': 'loss', 'content': 0.19907011091709137, 'timestamp': '2025-10-01 04:14:45.219468', 'step': 1804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:45.252663', 'step': 1804, 'epoch': 1} {'type': 'loss', 'content': 0.1955205500125885, 'timestamp': '2025-10-01 04:14:45.254794', 'step': 1805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:45.290847', 'step': 1805, 'epoch': 1} {'type': 'loss', 'content': 0.21635782718658447, 'timestamp': '2025-10-01 04:14:45.293060', 'step': 1806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:45.326804', 'step': 1806, 'epoch': 1} {'type': 'loss', 'content': 0.1572447419166565, 'timestamp': '2025-10-01 04:14:45.328903', 'step': 1807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:45.362243', 'step': 1807, 'epoch': 1} {'type': 'loss', 'content': 0.20105503499507904, 'timestamp': '2025-10-01 04:14:45.385870', 'step': 1808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:45.420194', 'step': 1808, 'epoch': 1} {'type': 'loss', 'content': 0.23987121880054474, 'timestamp': '2025-10-01 04:14:45.422209', 'step': 1809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:14:45.464594', 'step': 1809, 'epoch': 1} {'type': 'loss', 'content': 0.1822296380996704, 'timestamp': '2025-10-01 04:14:45.468020', 'step': 1810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:45.503743', 'step': 1810, 'epoch': 1} {'type': 'loss', 'content': 0.15315183997154236, 'timestamp': '2025-10-01 04:14:45.511241', 'step': 1811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:45.551541', 'step': 1811, 'epoch': 1} {'type': 'loss', 'content': 0.21478934586048126, 'timestamp': '2025-10-01 04:14:45.575308', 'step': 1812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:45.624814', 'step': 1812, 'epoch': 1} {'type': 'loss', 'content': 0.15159882605075836, 'timestamp': '2025-10-01 04:14:45.629535', 'step': 1813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:14:45.667024', 'step': 1813, 'epoch': 1} {'type': 'loss', 'content': 0.13657698035240173, 'timestamp': '2025-10-01 04:14:45.671226', 'step': 1814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:45.721168', 'step': 1814, 'epoch': 1} {'type': 'loss', 'content': 0.16808779537677765, 'timestamp': '2025-10-01 04:14:45.725310', 'step': 1815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:45.770584', 'step': 1815, 'epoch': 1} {'type': 'loss', 'content': 0.22997815907001495, 'timestamp': '2025-10-01 04:14:45.794275', 'step': 1816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:45.828143', 'step': 1816, 'epoch': 1} {'type': 'loss', 'content': 0.15293169021606445, 'timestamp': '2025-10-01 04:14:45.830439', 'step': 1817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:45.863306', 'step': 1817, 'epoch': 1} {'type': 'loss', 'content': 0.09138810634613037, 'timestamp': '2025-10-01 04:14:45.865604', 'step': 1818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:45.905222', 'step': 1818, 'epoch': 1} {'type': 'loss', 'content': 0.1576787382364273, 'timestamp': '2025-10-01 04:14:45.909279', 'step': 1819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:45.943595', 'step': 1819, 'epoch': 1} {'type': 'loss', 'content': 0.20693838596343994, 'timestamp': '2025-10-01 04:14:45.967376', 'step': 1820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:46.001183', 'step': 1820, 'epoch': 1} {'type': 'loss', 'content': 0.13579918444156647, 'timestamp': '2025-10-01 04:14:46.003473', 'step': 1821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:46.037438', 'step': 1821, 'epoch': 1} {'type': 'loss', 'content': 0.14853861927986145, 'timestamp': '2025-10-01 04:14:46.039535', 'step': 1822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:46.077958', 'step': 1822, 'epoch': 1} {'type': 'loss', 'content': 0.14038291573524475, 'timestamp': '2025-10-01 04:14:46.080063', 'step': 1823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:46.118766', 'step': 1823, 'epoch': 1} {'type': 'loss', 'content': 0.1402047872543335, 'timestamp': '2025-10-01 04:14:46.142341', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:14:46.181680', 'step': 1824, 'epoch': 1} {'type': 'loss', 'content': 0.21991118788719177, 'timestamp': '2025-10-01 04:14:46.183750', 'step': 1825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:46.218878', 'step': 1825, 'epoch': 1} {'type': 'loss', 'content': 0.15801602602005005, 'timestamp': '2025-10-01 04:14:46.222255', 'step': 1826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:46.257122', 'step': 1826, 'epoch': 1} {'type': 'loss', 'content': 0.19141733646392822, 'timestamp': '2025-10-01 04:14:46.259107', 'step': 1827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:46.297019', 'step': 1827, 'epoch': 1} {'type': 'loss', 'content': 0.0993652418255806, 'timestamp': '2025-10-01 04:14:46.320555', 'step': 1828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:46.352694', 'step': 1828, 'epoch': 1} {'type': 'loss', 'content': 0.16025014221668243, 'timestamp': '2025-10-01 04:14:46.354936', 'step': 1829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:46.397512', 'step': 1829, 'epoch': 1} {'type': 'loss', 'content': 0.06787323206663132, 'timestamp': '2025-10-01 04:14:46.399579', 'step': 1830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:46.440612', 'step': 1830, 'epoch': 1} {'type': 'loss', 'content': 0.1877278983592987, 'timestamp': '2025-10-01 04:14:46.445286', 'step': 1831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:46.487901', 'step': 1831, 'epoch': 1} {'type': 'loss', 'content': 0.1677202582359314, 'timestamp': '2025-10-01 04:14:46.512585', 'step': 1832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:46.551946', 'step': 1832, 'epoch': 1} {'type': 'loss', 'content': 0.2119063436985016, 'timestamp': '2025-10-01 04:14:46.554197', 'step': 1833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:46.589804', 'step': 1833, 'epoch': 1} {'type': 'loss', 'content': 0.2267729789018631, 'timestamp': '2025-10-01 04:14:46.591957', 'step': 1834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:46.632711', 'step': 1834, 'epoch': 1} {'type': 'loss', 'content': 0.0951177179813385, 'timestamp': '2025-10-01 04:14:46.634769', 'step': 1835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:46.677204', 'step': 1835, 'epoch': 1} {'type': 'loss', 'content': 0.2056690901517868, 'timestamp': '2025-10-01 04:14:46.700651', 'step': 1836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:46.737584', 'step': 1836, 'epoch': 1} {'type': 'loss', 'content': 0.11298127472400665, 'timestamp': '2025-10-01 04:14:46.739647', 'step': 1837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:14:46.774894', 'step': 1837, 'epoch': 1} {'type': 'loss', 'content': 0.222152441740036, 'timestamp': '2025-10-01 04:14:46.778047', 'step': 1838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:46.821819', 'step': 1838, 'epoch': 1} {'type': 'loss', 'content': 0.17500239610671997, 'timestamp': '2025-10-01 04:14:46.823998', 'step': 1839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:46.858445', 'step': 1839, 'epoch': 1} {'type': 'loss', 'content': 0.23048152029514313, 'timestamp': '2025-10-01 04:14:46.882118', 'step': 1840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:46.917001', 'step': 1840, 'epoch': 1} {'type': 'loss', 'content': 0.08961743116378784, 'timestamp': '2025-10-01 04:14:46.920157', 'step': 1841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:46.963337', 'step': 1841, 'epoch': 1} {'type': 'loss', 'content': 0.14892271161079407, 'timestamp': '2025-10-01 04:14:46.965368', 'step': 1842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:47.002406', 'step': 1842, 'epoch': 1} {'type': 'loss', 'content': 0.15912191569805145, 'timestamp': '2025-10-01 04:14:47.004564', 'step': 1843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:47.042380', 'step': 1843, 'epoch': 1} {'type': 'loss', 'content': 0.16604113578796387, 'timestamp': '2025-10-01 04:14:47.066050', 'step': 1844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:47.102004', 'step': 1844, 'epoch': 1} {'type': 'loss', 'content': 0.1474376767873764, 'timestamp': '2025-10-01 04:14:47.104368', 'step': 1845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:47.139811', 'step': 1845, 'epoch': 1} {'type': 'loss', 'content': 0.20533864200115204, 'timestamp': '2025-10-01 04:14:47.142161', 'step': 1846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:47.184102', 'step': 1846, 'epoch': 1} {'type': 'loss', 'content': 0.14166541397571564, 'timestamp': '2025-10-01 04:14:47.186816', 'step': 1847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:47.223923', 'step': 1847, 'epoch': 1} {'type': 'loss', 'content': 0.1676284819841385, 'timestamp': '2025-10-01 04:14:47.247878', 'step': 1848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:47.280052', 'step': 1848, 'epoch': 1} {'type': 'loss', 'content': 0.20035849511623383, 'timestamp': '2025-10-01 04:14:47.282246', 'step': 1849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:47.324972', 'step': 1849, 'epoch': 1} {'type': 'loss', 'content': 0.14935876429080963, 'timestamp': '2025-10-01 04:14:47.330291', 'step': 1850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:47.365974', 'step': 1850, 'epoch': 1} {'type': 'loss', 'content': 0.16610433161258698, 'timestamp': '2025-10-01 04:14:47.368046', 'step': 1851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:47.402735', 'step': 1851, 'epoch': 1} {'type': 'loss', 'content': 0.1936432123184204, 'timestamp': '2025-10-01 04:14:47.426031', 'step': 1852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:47.463251', 'step': 1852, 'epoch': 1} {'type': 'loss', 'content': 0.22524797916412354, 'timestamp': '2025-10-01 04:14:47.465227', 'step': 1853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:47.498888', 'step': 1853, 'epoch': 1} {'type': 'loss', 'content': 0.12041407078504562, 'timestamp': '2025-10-01 04:14:47.500921', 'step': 1854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:47.534922', 'step': 1854, 'epoch': 1} {'type': 'loss', 'content': 0.1485534906387329, 'timestamp': '2025-10-01 04:14:47.537114', 'step': 1855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:47.569698', 'step': 1855, 'epoch': 1} {'type': 'loss', 'content': 0.129713237285614, 'timestamp': '2025-10-01 04:14:47.593273', 'step': 1856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:47.625686', 'step': 1856, 'epoch': 1} {'type': 'loss', 'content': 0.1485634297132492, 'timestamp': '2025-10-01 04:14:47.627630', 'step': 1857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:47.666120', 'step': 1857, 'epoch': 1} {'type': 'loss', 'content': 0.071404829621315, 'timestamp': '2025-10-01 04:14:47.668140', 'step': 1858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:47.708794', 'step': 1858, 'epoch': 1} {'type': 'loss', 'content': 0.10016946494579315, 'timestamp': '2025-10-01 04:14:47.710935', 'step': 1859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:47.742677', 'step': 1859, 'epoch': 1} {'type': 'loss', 'content': 0.13908791542053223, 'timestamp': '2025-10-01 04:14:47.766245', 'step': 1860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:47.799051', 'step': 1860, 'epoch': 1} {'type': 'loss', 'content': 0.13998842239379883, 'timestamp': '2025-10-01 04:14:47.801148', 'step': 1861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:47.846892', 'step': 1861, 'epoch': 1} {'type': 'loss', 'content': 0.18910597264766693, 'timestamp': '2025-10-01 04:14:47.849040', 'step': 1862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:47.882962', 'step': 1862, 'epoch': 1} {'type': 'loss', 'content': 0.1760013997554779, 'timestamp': '2025-10-01 04:14:47.885369', 'step': 1863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:47.924178', 'step': 1863, 'epoch': 1} {'type': 'loss', 'content': 0.3261910378932953, 'timestamp': '2025-10-01 04:14:47.947750', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:14:59.169961', 'step': 1864, 'epoch': 1} {'type': 'pplx', 'content': 8645.188845906743, 'timestamp': '2025-10-01 04:14:59.173366', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:59.206585', 'step': 1864, 'epoch': 1} {'type': 'loss', 'content': 0.18083302676677704, 'timestamp': '2025-10-01 04:14:59.212787', 'step': 1865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:59.251509', 'step': 1865, 'epoch': 1} {'type': 'loss', 'content': 0.21310433745384216, 'timestamp': '2025-10-01 04:14:59.263132', 'step': 1866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:59.303043', 'step': 1866, 'epoch': 1} {'type': 'loss', 'content': 0.09019637107849121, 'timestamp': '2025-10-01 04:14:59.305098', 'step': 1867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:59.343484', 'step': 1867, 'epoch': 1} {'type': 'loss', 'content': 0.26415061950683594, 'timestamp': '2025-10-01 04:14:59.367176', 'step': 1868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:59.400092', 'step': 1868, 'epoch': 1} {'type': 'loss', 'content': 0.12127428501844406, 'timestamp': '2025-10-01 04:14:59.403414', 'step': 1869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:59.470452', 'step': 1869, 'epoch': 1} {'type': 'loss', 'content': 0.15400119125843048, 'timestamp': '2025-10-01 04:14:59.472789', 'step': 1870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:59.509573', 'step': 1870, 'epoch': 1} {'type': 'loss', 'content': 0.21568748354911804, 'timestamp': '2025-10-01 04:14:59.513206', 'step': 1871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:59.546116', 'step': 1871, 'epoch': 1} {'type': 'loss', 'content': 0.18721191585063934, 'timestamp': '2025-10-01 04:14:59.570323', 'step': 1872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:59.605067', 'step': 1872, 'epoch': 1} {'type': 'loss', 'content': 0.21695196628570557, 'timestamp': '2025-10-01 04:14:59.607669', 'step': 1873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:14:59.639935', 'step': 1873, 'epoch': 1} {'type': 'loss', 'content': 0.13169623911380768, 'timestamp': '2025-10-01 04:14:59.641759', 'step': 1874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:59.676200', 'step': 1874, 'epoch': 1} {'type': 'loss', 'content': 0.17058347165584564, 'timestamp': '2025-10-01 04:14:59.678147', 'step': 1875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:59.715028', 'step': 1875, 'epoch': 1} {'type': 'loss', 'content': 0.12547729909420013, 'timestamp': '2025-10-01 04:14:59.738398', 'step': 1876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:14:59.774407', 'step': 1876, 'epoch': 1} {'type': 'loss', 'content': 0.22949767112731934, 'timestamp': '2025-10-01 04:14:59.777002', 'step': 1877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:59.828134', 'step': 1877, 'epoch': 1} {'type': 'loss', 'content': 0.13642634451389313, 'timestamp': '2025-10-01 04:14:59.830808', 'step': 1878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:59.867307', 'step': 1878, 'epoch': 1} {'type': 'loss', 'content': 0.12366025149822235, 'timestamp': '2025-10-01 04:14:59.869775', 'step': 1879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:14:59.903646', 'step': 1879, 'epoch': 1} {'type': 'loss', 'content': 0.17415007948875427, 'timestamp': '2025-10-01 04:14:59.927753', 'step': 1880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:59.962154', 'step': 1880, 'epoch': 1} {'type': 'loss', 'content': 0.21079367399215698, 'timestamp': '2025-10-01 04:14:59.964564', 'step': 1881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:14:59.998953', 'step': 1881, 'epoch': 1} {'type': 'loss', 'content': 0.10072766244411469, 'timestamp': '2025-10-01 04:15:00.001347', 'step': 1882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:00.038566', 'step': 1882, 'epoch': 1} {'type': 'loss', 'content': 0.2001989781856537, 'timestamp': '2025-10-01 04:15:00.040927', 'step': 1883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:00.074309', 'step': 1883, 'epoch': 1} {'type': 'loss', 'content': 0.17190136015415192, 'timestamp': '2025-10-01 04:15:00.098363', 'step': 1884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:00.144534', 'step': 1884, 'epoch': 1} {'type': 'loss', 'content': 0.12245950847864151, 'timestamp': '2025-10-01 04:15:00.146754', 'step': 1885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:00.181278', 'step': 1885, 'epoch': 1} {'type': 'loss', 'content': 0.1364104449748993, 'timestamp': '2025-10-01 04:15:00.183979', 'step': 1886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:00.223591', 'step': 1886, 'epoch': 1} {'type': 'loss', 'content': 0.15864317119121552, 'timestamp': '2025-10-01 04:15:00.226011', 'step': 1887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:00.261218', 'step': 1887, 'epoch': 1} {'type': 'loss', 'content': 0.20862896740436554, 'timestamp': '2025-10-01 04:15:00.284945', 'step': 1888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:00.320497', 'step': 1888, 'epoch': 1} {'type': 'loss', 'content': 0.12567944824695587, 'timestamp': '2025-10-01 04:15:00.322529', 'step': 1889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:00.355870', 'step': 1889, 'epoch': 1} {'type': 'loss', 'content': 0.23249787092208862, 'timestamp': '2025-10-01 04:15:00.358562', 'step': 1890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:00.394358', 'step': 1890, 'epoch': 1} {'type': 'loss', 'content': 0.18436144292354584, 'timestamp': '2025-10-01 04:15:00.397080', 'step': 1891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:00.431115', 'step': 1891, 'epoch': 1} {'type': 'loss', 'content': 0.2276606559753418, 'timestamp': '2025-10-01 04:15:00.455085', 'step': 1892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:00.491859', 'step': 1892, 'epoch': 1} {'type': 'loss', 'content': 0.12032315135002136, 'timestamp': '2025-10-01 04:15:00.494060', 'step': 1893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:00.541739', 'step': 1893, 'epoch': 1} {'type': 'loss', 'content': 0.18584971129894257, 'timestamp': '2025-10-01 04:15:00.543565', 'step': 1894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:00.592628', 'step': 1894, 'epoch': 1} {'type': 'loss', 'content': 0.16197405755519867, 'timestamp': '2025-10-01 04:15:00.595134', 'step': 1895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:00.642148', 'step': 1895, 'epoch': 1} {'type': 'loss', 'content': 0.2200554758310318, 'timestamp': '2025-10-01 04:15:00.665371', 'step': 1896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:00.700119', 'step': 1896, 'epoch': 1} {'type': 'loss', 'content': 0.15944023430347443, 'timestamp': '2025-10-01 04:15:00.702525', 'step': 1897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:00.746462', 'step': 1897, 'epoch': 1} {'type': 'loss', 'content': 0.11501864343881607, 'timestamp': '2025-10-01 04:15:00.749173', 'step': 1898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:00.797898', 'step': 1898, 'epoch': 1} {'type': 'loss', 'content': 0.1325196474790573, 'timestamp': '2025-10-01 04:15:00.800037', 'step': 1899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:00.847183', 'step': 1899, 'epoch': 1} {'type': 'loss', 'content': 0.1818004995584488, 'timestamp': '2025-10-01 04:15:00.870995', 'step': 1900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:00.904798', 'step': 1900, 'epoch': 1} {'type': 'loss', 'content': 0.23304234445095062, 'timestamp': '2025-10-01 04:15:00.907334', 'step': 1901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:00.942385', 'step': 1901, 'epoch': 1} {'type': 'loss', 'content': 0.2981346845626831, 'timestamp': '2025-10-01 04:15:00.944170', 'step': 1902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:00.990925', 'step': 1902, 'epoch': 1} {'type': 'loss', 'content': 0.24135100841522217, 'timestamp': '2025-10-01 04:15:00.992985', 'step': 1903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:01.027627', 'step': 1903, 'epoch': 1} {'type': 'loss', 'content': 0.27164074778556824, 'timestamp': '2025-10-01 04:15:01.051252', 'step': 1904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:01.096095', 'step': 1904, 'epoch': 1} {'type': 'loss', 'content': 0.18921464681625366, 'timestamp': '2025-10-01 04:15:01.098227', 'step': 1905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:01.133634', 'step': 1905, 'epoch': 1} {'type': 'loss', 'content': 0.2206116020679474, 'timestamp': '2025-10-01 04:15:01.137407', 'step': 1906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:01.172331', 'step': 1906, 'epoch': 1} {'type': 'loss', 'content': 0.1919645518064499, 'timestamp': '2025-10-01 04:15:01.174378', 'step': 1907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:01.210448', 'step': 1907, 'epoch': 1} {'type': 'loss', 'content': 0.23587602376937866, 'timestamp': '2025-10-01 04:15:01.234419', 'step': 1908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:01.270999', 'step': 1908, 'epoch': 1} {'type': 'loss', 'content': 0.18819637596607208, 'timestamp': '2025-10-01 04:15:01.273005', 'step': 1909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:01.307095', 'step': 1909, 'epoch': 1} {'type': 'loss', 'content': 0.19932827353477478, 'timestamp': '2025-10-01 04:15:01.309645', 'step': 1910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:01.359234', 'step': 1910, 'epoch': 1} {'type': 'loss', 'content': 0.17181994020938873, 'timestamp': '2025-10-01 04:15:01.361365', 'step': 1911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:01.400086', 'step': 1911, 'epoch': 1} {'type': 'loss', 'content': 0.23541268706321716, 'timestamp': '2025-10-01 04:15:01.423562', 'step': 1912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:01.469561', 'step': 1912, 'epoch': 1} {'type': 'loss', 'content': 0.14764925837516785, 'timestamp': '2025-10-01 04:15:01.478832', 'step': 1913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:01.527057', 'step': 1913, 'epoch': 1} {'type': 'loss', 'content': 0.17975230515003204, 'timestamp': '2025-10-01 04:15:01.529085', 'step': 1914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:01.568114', 'step': 1914, 'epoch': 1} {'type': 'loss', 'content': 0.2777596116065979, 'timestamp': '2025-10-01 04:15:01.570259', 'step': 1915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:01.605534', 'step': 1915, 'epoch': 1} {'type': 'loss', 'content': 0.15812444686889648, 'timestamp': '2025-10-01 04:15:01.629015', 'step': 1916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:01.665073', 'step': 1916, 'epoch': 1} {'type': 'loss', 'content': 0.15210554003715515, 'timestamp': '2025-10-01 04:15:01.667157', 'step': 1917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:01.703038', 'step': 1917, 'epoch': 1} {'type': 'loss', 'content': 0.17329257726669312, 'timestamp': '2025-10-01 04:15:01.705479', 'step': 1918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:01.741305', 'step': 1918, 'epoch': 1} {'type': 'loss', 'content': 0.163172647356987, 'timestamp': '2025-10-01 04:15:01.743348', 'step': 1919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:01.791536', 'step': 1919, 'epoch': 1} {'type': 'loss', 'content': 0.1937228888273239, 'timestamp': '2025-10-01 04:15:01.815451', 'step': 1920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:01.849879', 'step': 1920, 'epoch': 1} {'type': 'loss', 'content': 0.22825995087623596, 'timestamp': '2025-10-01 04:15:01.851924', 'step': 1921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:01.887278', 'step': 1921, 'epoch': 1} {'type': 'loss', 'content': 0.14693236351013184, 'timestamp': '2025-10-01 04:15:01.889311', 'step': 1922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:01.924911', 'step': 1922, 'epoch': 1} {'type': 'loss', 'content': 0.15378625690937042, 'timestamp': '2025-10-01 04:15:01.926888', 'step': 1923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:01.960306', 'step': 1923, 'epoch': 1} {'type': 'loss', 'content': 0.2115049660205841, 'timestamp': '2025-10-01 04:15:01.983911', 'step': 1924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:02.017536', 'step': 1924, 'epoch': 1} {'type': 'loss', 'content': 0.22268639504909515, 'timestamp': '2025-10-01 04:15:02.019443', 'step': 1925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:02.053612', 'step': 1925, 'epoch': 1} {'type': 'loss', 'content': 0.22716684639453888, 'timestamp': '2025-10-01 04:15:02.058705', 'step': 1926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:02.093979', 'step': 1926, 'epoch': 1} {'type': 'loss', 'content': 0.11680269241333008, 'timestamp': '2025-10-01 04:15:02.096775', 'step': 1927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:02.141082', 'step': 1927, 'epoch': 1} {'type': 'loss', 'content': 0.24264085292816162, 'timestamp': '2025-10-01 04:15:02.164353', 'step': 1928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:02.204548', 'step': 1928, 'epoch': 1} {'type': 'loss', 'content': 0.1947851926088333, 'timestamp': '2025-10-01 04:15:02.206380', 'step': 1929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:02.240278', 'step': 1929, 'epoch': 1} {'type': 'loss', 'content': 0.1902519017457962, 'timestamp': '2025-10-01 04:15:02.242291', 'step': 1930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:02.278757', 'step': 1930, 'epoch': 1} {'type': 'loss', 'content': 0.19290922582149506, 'timestamp': '2025-10-01 04:15:02.280893', 'step': 1931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:02.314060', 'step': 1931, 'epoch': 1} {'type': 'loss', 'content': 0.12679512798786163, 'timestamp': '2025-10-01 04:15:02.337372', 'step': 1932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:02.369454', 'step': 1932, 'epoch': 1} {'type': 'loss', 'content': 0.226154625415802, 'timestamp': '2025-10-01 04:15:02.371534', 'step': 1933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:02.409175', 'step': 1933, 'epoch': 1} {'type': 'loss', 'content': 0.17509573698043823, 'timestamp': '2025-10-01 04:15:02.411240', 'step': 1934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:02.450725', 'step': 1934, 'epoch': 1} {'type': 'loss', 'content': 0.12091118842363358, 'timestamp': '2025-10-01 04:15:02.455938', 'step': 1935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:02.507297', 'step': 1935, 'epoch': 1} {'type': 'loss', 'content': 0.21727126836776733, 'timestamp': '2025-10-01 04:15:02.531007', 'step': 1936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:02.566851', 'step': 1936, 'epoch': 1} {'type': 'loss', 'content': 0.15170319378376007, 'timestamp': '2025-10-01 04:15:02.569776', 'step': 1937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:02.605844', 'step': 1937, 'epoch': 1} {'type': 'loss', 'content': 0.25110960006713867, 'timestamp': '2025-10-01 04:15:02.607851', 'step': 1938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:02.645120', 'step': 1938, 'epoch': 1} {'type': 'loss', 'content': 0.19769376516342163, 'timestamp': '2025-10-01 04:15:02.653482', 'step': 1939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:02.708398', 'step': 1939, 'epoch': 1} {'type': 'loss', 'content': 0.16844066977500916, 'timestamp': '2025-10-01 04:15:02.742591', 'step': 1940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:02.791143', 'step': 1940, 'epoch': 1} {'type': 'loss', 'content': 0.2663044035434723, 'timestamp': '2025-10-01 04:15:02.793376', 'step': 1941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:02.827575', 'step': 1941, 'epoch': 1} {'type': 'loss', 'content': 0.189046710729599, 'timestamp': '2025-10-01 04:15:02.829871', 'step': 1942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:02.867032', 'step': 1942, 'epoch': 1} {'type': 'loss', 'content': 0.147701233625412, 'timestamp': '2025-10-01 04:15:02.869068', 'step': 1943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:02.906208', 'step': 1943, 'epoch': 1} {'type': 'loss', 'content': 0.22361229360103607, 'timestamp': '2025-10-01 04:15:02.929661', 'step': 1944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:02.963604', 'step': 1944, 'epoch': 1} {'type': 'loss', 'content': 0.26984187960624695, 'timestamp': '2025-10-01 04:15:02.966385', 'step': 1945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:03.017210', 'step': 1945, 'epoch': 1} {'type': 'loss', 'content': 0.13936276733875275, 'timestamp': '2025-10-01 04:15:03.018992', 'step': 1946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:03.066565', 'step': 1946, 'epoch': 1} {'type': 'loss', 'content': 0.195598304271698, 'timestamp': '2025-10-01 04:15:03.068384', 'step': 1947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:03.105221', 'step': 1947, 'epoch': 1} {'type': 'loss', 'content': 0.17058409750461578, 'timestamp': '2025-10-01 04:15:03.128696', 'step': 1948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:03.178420', 'step': 1948, 'epoch': 1} {'type': 'loss', 'content': 0.3206891715526581, 'timestamp': '2025-10-01 04:15:03.180378', 'step': 1949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:03.217531', 'step': 1949, 'epoch': 1} {'type': 'loss', 'content': 0.1854446530342102, 'timestamp': '2025-10-01 04:15:03.219630', 'step': 1950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:03.255071', 'step': 1950, 'epoch': 1} {'type': 'loss', 'content': 0.2790420949459076, 'timestamp': '2025-10-01 04:15:03.257363', 'step': 1951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:03.291367', 'step': 1951, 'epoch': 1} {'type': 'loss', 'content': 0.13133461773395538, 'timestamp': '2025-10-01 04:15:03.314922', 'step': 1952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:03.354278', 'step': 1952, 'epoch': 1} {'type': 'loss', 'content': 0.17538145184516907, 'timestamp': '2025-10-01 04:15:03.356356', 'step': 1953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:03.389547', 'step': 1953, 'epoch': 1} {'type': 'loss', 'content': 0.11640941351652145, 'timestamp': '2025-10-01 04:15:03.391771', 'step': 1954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:03.429883', 'step': 1954, 'epoch': 1} {'type': 'loss', 'content': 0.136159747838974, 'timestamp': '2025-10-01 04:15:03.432175', 'step': 1955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:03.469030', 'step': 1955, 'epoch': 1} {'type': 'loss', 'content': 0.15509328246116638, 'timestamp': '2025-10-01 04:15:03.492477', 'step': 1956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:03.526000', 'step': 1956, 'epoch': 1} {'type': 'loss', 'content': 0.23197752237319946, 'timestamp': '2025-10-01 04:15:03.529370', 'step': 1957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:03.567576', 'step': 1957, 'epoch': 1} {'type': 'loss', 'content': 0.19344189763069153, 'timestamp': '2025-10-01 04:15:03.572947', 'step': 1958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:03.609525', 'step': 1958, 'epoch': 1} {'type': 'loss', 'content': 0.17266619205474854, 'timestamp': '2025-10-01 04:15:03.611734', 'step': 1959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:03.647331', 'step': 1959, 'epoch': 1} {'type': 'loss', 'content': 0.17660754919052124, 'timestamp': '2025-10-01 04:15:03.670930', 'step': 1960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:03.708377', 'step': 1960, 'epoch': 1} {'type': 'loss', 'content': 0.14888441562652588, 'timestamp': '2025-10-01 04:15:03.710438', 'step': 1961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:03.761855', 'step': 1961, 'epoch': 1} {'type': 'loss', 'content': 0.14468413591384888, 'timestamp': '2025-10-01 04:15:03.764324', 'step': 1962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:03.802383', 'step': 1962, 'epoch': 1} {'type': 'loss', 'content': 0.12028256803750992, 'timestamp': '2025-10-01 04:15:03.809289', 'step': 1963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:03.844007', 'step': 1963, 'epoch': 1} {'type': 'loss', 'content': 0.12247105687856674, 'timestamp': '2025-10-01 04:15:03.867845', 'step': 1964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:03.907025', 'step': 1964, 'epoch': 1} {'type': 'loss', 'content': 0.1706765741109848, 'timestamp': '2025-10-01 04:15:03.909137', 'step': 1965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:03.947799', 'step': 1965, 'epoch': 1} {'type': 'loss', 'content': 0.123264379799366, 'timestamp': '2025-10-01 04:15:03.949984', 'step': 1966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:04.000119', 'step': 1966, 'epoch': 1} {'type': 'loss', 'content': 0.37578240036964417, 'timestamp': '2025-10-01 04:15:04.002305', 'step': 1967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:04.038907', 'step': 1967, 'epoch': 1} {'type': 'loss', 'content': 0.14515429735183716, 'timestamp': '2025-10-01 04:15:04.062602', 'step': 1968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:04.104753', 'step': 1968, 'epoch': 1} {'type': 'loss', 'content': 0.16339723765850067, 'timestamp': '2025-10-01 04:15:04.106912', 'step': 1969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:04.143172', 'step': 1969, 'epoch': 1} {'type': 'loss', 'content': 0.1443866491317749, 'timestamp': '2025-10-01 04:15:04.145177', 'step': 1970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:04.185173', 'step': 1970, 'epoch': 1} {'type': 'loss', 'content': 0.2622092068195343, 'timestamp': '2025-10-01 04:15:04.187306', 'step': 1971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:04.221992', 'step': 1971, 'epoch': 1} {'type': 'loss', 'content': 0.12096545100212097, 'timestamp': '2025-10-01 04:15:04.245788', 'step': 1972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:04.281910', 'step': 1972, 'epoch': 1} {'type': 'loss', 'content': 0.14275363087654114, 'timestamp': '2025-10-01 04:15:04.284212', 'step': 1973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:04.321917', 'step': 1973, 'epoch': 1} {'type': 'loss', 'content': 0.20746727287769318, 'timestamp': '2025-10-01 04:15:04.324085', 'step': 1974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:04.363105', 'step': 1974, 'epoch': 1} {'type': 'loss', 'content': 0.1678464561700821, 'timestamp': '2025-10-01 04:15:04.365157', 'step': 1975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:04.399419', 'step': 1975, 'epoch': 1} {'type': 'loss', 'content': 0.15576067566871643, 'timestamp': '2025-10-01 04:15:04.423012', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:04.458199', 'step': 1976, 'epoch': 1} {'type': 'loss', 'content': 0.1756669133901596, 'timestamp': '2025-10-01 04:15:04.460189', 'step': 1977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:04.497661', 'step': 1977, 'epoch': 1} {'type': 'loss', 'content': 0.19455821812152863, 'timestamp': '2025-10-01 04:15:04.499748', 'step': 1978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:04.549124', 'step': 1978, 'epoch': 1} {'type': 'loss', 'content': 0.26136279106140137, 'timestamp': '2025-10-01 04:15:04.551145', 'step': 1979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:04.586240', 'step': 1979, 'epoch': 1} {'type': 'loss', 'content': 0.14867901802062988, 'timestamp': '2025-10-01 04:15:04.609680', 'step': 1980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:04.650686', 'step': 1980, 'epoch': 1} {'type': 'loss', 'content': 0.19290918111801147, 'timestamp': '2025-10-01 04:15:04.652860', 'step': 1981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:04.686694', 'step': 1981, 'epoch': 1} {'type': 'loss', 'content': 0.25440800189971924, 'timestamp': '2025-10-01 04:15:04.689138', 'step': 1982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:04.727735', 'step': 1982, 'epoch': 1} {'type': 'loss', 'content': 0.21816743910312653, 'timestamp': '2025-10-01 04:15:04.729778', 'step': 1983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:04.767561', 'step': 1983, 'epoch': 1} {'type': 'loss', 'content': 0.24148017168045044, 'timestamp': '2025-10-01 04:15:04.791199', 'step': 1984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:04.824883', 'step': 1984, 'epoch': 1} {'type': 'loss', 'content': 0.13317424058914185, 'timestamp': '2025-10-01 04:15:04.827111', 'step': 1985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:04.861917', 'step': 1985, 'epoch': 1} {'type': 'loss', 'content': 0.14364881813526154, 'timestamp': '2025-10-01 04:15:04.863974', 'step': 1986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:04.908290', 'step': 1986, 'epoch': 1} {'type': 'loss', 'content': 0.23952262103557587, 'timestamp': '2025-10-01 04:15:04.910541', 'step': 1987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:04.947576', 'step': 1987, 'epoch': 1} {'type': 'loss', 'content': 0.20860598981380463, 'timestamp': '2025-10-01 04:15:04.971407', 'step': 1988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:05.014695', 'step': 1988, 'epoch': 1} {'type': 'loss', 'content': 0.2636886239051819, 'timestamp': '2025-10-01 04:15:05.016880', 'step': 1989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:05.065687', 'step': 1989, 'epoch': 1} {'type': 'loss', 'content': 0.14396050572395325, 'timestamp': '2025-10-01 04:15:05.067579', 'step': 1990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:05.114271', 'step': 1990, 'epoch': 1} {'type': 'loss', 'content': 0.17115387320518494, 'timestamp': '2025-10-01 04:15:05.117088', 'step': 1991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:05.154072', 'step': 1991, 'epoch': 1} {'type': 'loss', 'content': 0.16152457892894745, 'timestamp': '2025-10-01 04:15:05.177774', 'step': 1992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:05.212344', 'step': 1992, 'epoch': 1} {'type': 'loss', 'content': 0.16897179186344147, 'timestamp': '2025-10-01 04:15:05.214475', 'step': 1993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:05.270112', 'step': 1993, 'epoch': 1} {'type': 'loss', 'content': 0.19188816845417023, 'timestamp': '2025-10-01 04:15:05.272117', 'step': 1994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:05.305575', 'step': 1994, 'epoch': 1} {'type': 'loss', 'content': 0.2179824262857437, 'timestamp': '2025-10-01 04:15:05.307608', 'step': 1995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:05.351850', 'step': 1995, 'epoch': 1} {'type': 'loss', 'content': 0.1688559502363205, 'timestamp': '2025-10-01 04:15:05.375351', 'step': 1996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:05.409261', 'step': 1996, 'epoch': 1} {'type': 'loss', 'content': 0.2094745934009552, 'timestamp': '2025-10-01 04:15:05.411297', 'step': 1997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:05.445743', 'step': 1997, 'epoch': 1} {'type': 'loss', 'content': 0.256290078163147, 'timestamp': '2025-10-01 04:15:05.447846', 'step': 1998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:05.493683', 'step': 1998, 'epoch': 1} {'type': 'loss', 'content': 0.15378563106060028, 'timestamp': '2025-10-01 04:15:05.495617', 'step': 1999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:05.529684', 'step': 1999, 'epoch': 1} {'type': 'loss', 'content': 0.16631008684635162, 'timestamp': '2025-10-01 04:15:05.553124', 'step': 2000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-10-01 04:15:10.653834', 'step': 2000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:10.701420', 'step': 2000, 'epoch': 1} {'type': 'loss', 'content': 0.14658938348293304, 'timestamp': '2025-10-01 04:15:10.703681', 'step': 2001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:10.748601', 'step': 2001, 'epoch': 1} {'type': 'loss', 'content': 0.24329398572444916, 'timestamp': '2025-10-01 04:15:10.751149', 'step': 2002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:10.798208', 'step': 2002, 'epoch': 1} {'type': 'loss', 'content': 0.19199037551879883, 'timestamp': '2025-10-01 04:15:10.800531', 'step': 2003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:10.835568', 'step': 2003, 'epoch': 1} {'type': 'loss', 'content': 0.215931236743927, 'timestamp': '2025-10-01 04:15:10.861088', 'step': 2004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:10.912245', 'step': 2004, 'epoch': 1} {'type': 'loss', 'content': 0.13528607785701752, 'timestamp': '2025-10-01 04:15:10.914185', 'step': 2005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:10.966429', 'step': 2005, 'epoch': 1} {'type': 'loss', 'content': 0.1414584070444107, 'timestamp': '2025-10-01 04:15:10.968911', 'step': 2006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:11.006682', 'step': 2006, 'epoch': 1} {'type': 'loss', 'content': 0.24296706914901733, 'timestamp': '2025-10-01 04:15:11.009246', 'step': 2007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:11.058121', 'step': 2007, 'epoch': 1} {'type': 'loss', 'content': 0.20034325122833252, 'timestamp': '2025-10-01 04:15:11.081965', 'step': 2008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:11.124998', 'step': 2008, 'epoch': 1} {'type': 'loss', 'content': 0.11708991229534149, 'timestamp': '2025-10-01 04:15:11.128644', 'step': 2009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:11.165806', 'step': 2009, 'epoch': 1} {'type': 'loss', 'content': 0.19825150072574615, 'timestamp': '2025-10-01 04:15:11.168221', 'step': 2010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:11.204387', 'step': 2010, 'epoch': 1} {'type': 'loss', 'content': 0.11740318685770035, 'timestamp': '2025-10-01 04:15:11.206421', 'step': 2011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:11.241551', 'step': 2011, 'epoch': 1} {'type': 'loss', 'content': 0.17356280982494354, 'timestamp': '2025-10-01 04:15:11.266549', 'step': 2012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:11.301680', 'step': 2012, 'epoch': 1} {'type': 'loss', 'content': 0.31685855984687805, 'timestamp': '2025-10-01 04:15:11.303683', 'step': 2013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:11.339145', 'step': 2013, 'epoch': 1} {'type': 'loss', 'content': 0.24968582391738892, 'timestamp': '2025-10-01 04:15:11.341897', 'step': 2014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:11.375154', 'step': 2014, 'epoch': 1} {'type': 'loss', 'content': 0.13337910175323486, 'timestamp': '2025-10-01 04:15:11.377574', 'step': 2015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:11.413287', 'step': 2015, 'epoch': 1} {'type': 'loss', 'content': 0.09750770032405853, 'timestamp': '2025-10-01 04:15:11.437004', 'step': 2016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:11.473848', 'step': 2016, 'epoch': 1} {'type': 'loss', 'content': 0.11011125147342682, 'timestamp': '2025-10-01 04:15:11.476160', 'step': 2017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:11.509472', 'step': 2017, 'epoch': 1} {'type': 'loss', 'content': 0.18647965788841248, 'timestamp': '2025-10-01 04:15:11.512210', 'step': 2018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:11.548510', 'step': 2018, 'epoch': 1} {'type': 'loss', 'content': 0.1619243323802948, 'timestamp': '2025-10-01 04:15:11.550952', 'step': 2019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:11.585077', 'step': 2019, 'epoch': 1} {'type': 'loss', 'content': 0.2844645082950592, 'timestamp': '2025-10-01 04:15:11.613178', 'step': 2020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:11.674584', 'step': 2020, 'epoch': 1} {'type': 'loss', 'content': 0.19815117120742798, 'timestamp': '2025-10-01 04:15:11.676597', 'step': 2021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:11.710439', 'step': 2021, 'epoch': 1} {'type': 'loss', 'content': 0.16952632367610931, 'timestamp': '2025-10-01 04:15:11.712454', 'step': 2022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:11.748454', 'step': 2022, 'epoch': 1} {'type': 'loss', 'content': 0.23925401270389557, 'timestamp': '2025-10-01 04:15:11.751208', 'step': 2023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:11.797990', 'step': 2023, 'epoch': 1} {'type': 'loss', 'content': 0.15464305877685547, 'timestamp': '2025-10-01 04:15:11.821346', 'step': 2024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:11.855365', 'step': 2024, 'epoch': 1} {'type': 'loss', 'content': 0.23557989299297333, 'timestamp': '2025-10-01 04:15:11.857508', 'step': 2025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:11.894752', 'step': 2025, 'epoch': 1} {'type': 'loss', 'content': 0.22923582792282104, 'timestamp': '2025-10-01 04:15:11.896697', 'step': 2026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:11.943135', 'step': 2026, 'epoch': 1} {'type': 'loss', 'content': 0.16552451252937317, 'timestamp': '2025-10-01 04:15:11.945373', 'step': 2027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:11.978755', 'step': 2027, 'epoch': 1} {'type': 'loss', 'content': 0.126480832695961, 'timestamp': '2025-10-01 04:15:12.006562', 'step': 2028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:12.039359', 'step': 2028, 'epoch': 1} {'type': 'loss', 'content': 0.16125619411468506, 'timestamp': '2025-10-01 04:15:12.041786', 'step': 2029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:12.077335', 'step': 2029, 'epoch': 1} {'type': 'loss', 'content': 0.17085042595863342, 'timestamp': '2025-10-01 04:15:12.079694', 'step': 2030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:12.113631', 'step': 2030, 'epoch': 1} {'type': 'loss', 'content': 0.11644431203603745, 'timestamp': '2025-10-01 04:15:12.115993', 'step': 2031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:12.150945', 'step': 2031, 'epoch': 1} {'type': 'loss', 'content': 0.14552590250968933, 'timestamp': '2025-10-01 04:15:12.174374', 'step': 2032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:12.208908', 'step': 2032, 'epoch': 1} {'type': 'loss', 'content': 0.3090848922729492, 'timestamp': '2025-10-01 04:15:12.210996', 'step': 2033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:12.245307', 'step': 2033, 'epoch': 1} {'type': 'loss', 'content': 0.153142511844635, 'timestamp': '2025-10-01 04:15:12.247397', 'step': 2034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:12.301460', 'step': 2034, 'epoch': 1} {'type': 'loss', 'content': 0.11170695722103119, 'timestamp': '2025-10-01 04:15:12.303584', 'step': 2035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:12.338833', 'step': 2035, 'epoch': 1} {'type': 'loss', 'content': 0.25931987166404724, 'timestamp': '2025-10-01 04:15:12.362411', 'step': 2036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:12.397998', 'step': 2036, 'epoch': 1} {'type': 'loss', 'content': 0.16045448184013367, 'timestamp': '2025-10-01 04:15:12.400014', 'step': 2037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:12.439831', 'step': 2037, 'epoch': 1} {'type': 'loss', 'content': 0.2769172191619873, 'timestamp': '2025-10-01 04:15:12.441983', 'step': 2038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:12.488794', 'step': 2038, 'epoch': 1} {'type': 'loss', 'content': 0.2133660763502121, 'timestamp': '2025-10-01 04:15:12.490740', 'step': 2039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:12.525278', 'step': 2039, 'epoch': 1} {'type': 'loss', 'content': 0.14098691940307617, 'timestamp': '2025-10-01 04:15:12.548885', 'step': 2040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:12.589139', 'step': 2040, 'epoch': 1} {'type': 'loss', 'content': 0.18072833120822906, 'timestamp': '2025-10-01 04:15:12.590946', 'step': 2041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:12.627390', 'step': 2041, 'epoch': 1} {'type': 'loss', 'content': 0.13222090899944305, 'timestamp': '2025-10-01 04:15:12.629421', 'step': 2042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:12.667366', 'step': 2042, 'epoch': 1} {'type': 'loss', 'content': 0.20944108068943024, 'timestamp': '2025-10-01 04:15:12.669664', 'step': 2043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:12.702025', 'step': 2043, 'epoch': 1} {'type': 'loss', 'content': 0.09474455565214157, 'timestamp': '2025-10-01 04:15:12.725854', 'step': 2044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:12.765294', 'step': 2044, 'epoch': 1} {'type': 'loss', 'content': 0.2049545794725418, 'timestamp': '2025-10-01 04:15:12.774642', 'step': 2045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:12.807845', 'step': 2045, 'epoch': 1} {'type': 'loss', 'content': 0.08757565915584564, 'timestamp': '2025-10-01 04:15:12.810036', 'step': 2046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:12.865308', 'step': 2046, 'epoch': 1} {'type': 'loss', 'content': 0.17946964502334595, 'timestamp': '2025-10-01 04:15:12.867776', 'step': 2047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:12.901955', 'step': 2047, 'epoch': 1} {'type': 'loss', 'content': 0.2141171544790268, 'timestamp': '2025-10-01 04:15:12.925809', 'step': 2048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:12.961671', 'step': 2048, 'epoch': 1} {'type': 'loss', 'content': 0.11732586473226547, 'timestamp': '2025-10-01 04:15:12.963591', 'step': 2049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:13.008810', 'step': 2049, 'epoch': 1} {'type': 'loss', 'content': 0.18266843259334564, 'timestamp': '2025-10-01 04:15:13.011027', 'step': 2050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:13.044392', 'step': 2050, 'epoch': 1} {'type': 'loss', 'content': 0.19109120965003967, 'timestamp': '2025-10-01 04:15:13.046535', 'step': 2051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:13.080236', 'step': 2051, 'epoch': 1} {'type': 'loss', 'content': 0.1622334122657776, 'timestamp': '2025-10-01 04:15:13.103748', 'step': 2052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:13.136174', 'step': 2052, 'epoch': 1} {'type': 'loss', 'content': 0.24314606189727783, 'timestamp': '2025-10-01 04:15:13.138350', 'step': 2053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:13.172859', 'step': 2053, 'epoch': 1} {'type': 'loss', 'content': 0.14860627055168152, 'timestamp': '2025-10-01 04:15:13.175376', 'step': 2054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:13.209701', 'step': 2054, 'epoch': 1} {'type': 'loss', 'content': 0.11847484111785889, 'timestamp': '2025-10-01 04:15:13.211692', 'step': 2055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:13.247792', 'step': 2055, 'epoch': 1} {'type': 'loss', 'content': 0.12525750696659088, 'timestamp': '2025-10-01 04:15:13.271496', 'step': 2056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:13.307884', 'step': 2056, 'epoch': 1} {'type': 'loss', 'content': 0.13005399703979492, 'timestamp': '2025-10-01 04:15:13.309990', 'step': 2057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:13.343614', 'step': 2057, 'epoch': 1} {'type': 'loss', 'content': 0.22745761275291443, 'timestamp': '2025-10-01 04:15:13.346181', 'step': 2058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:13.383437', 'step': 2058, 'epoch': 1} {'type': 'loss', 'content': 0.13203179836273193, 'timestamp': '2025-10-01 04:15:13.385687', 'step': 2059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:13.424058', 'step': 2059, 'epoch': 1} {'type': 'loss', 'content': 0.121748186647892, 'timestamp': '2025-10-01 04:15:13.448580', 'step': 2060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:13.490548', 'step': 2060, 'epoch': 1} {'type': 'loss', 'content': 0.14516514539718628, 'timestamp': '2025-10-01 04:15:13.492613', 'step': 2061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:13.528999', 'step': 2061, 'epoch': 1} {'type': 'loss', 'content': 0.1393069177865982, 'timestamp': '2025-10-01 04:15:13.531015', 'step': 2062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:13.567526', 'step': 2062, 'epoch': 1} {'type': 'loss', 'content': 0.18690305948257446, 'timestamp': '2025-10-01 04:15:13.571040', 'step': 2063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:13.603607', 'step': 2063, 'epoch': 1} {'type': 'loss', 'content': 0.246782124042511, 'timestamp': '2025-10-01 04:15:13.627331', 'step': 2064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:13.679422', 'step': 2064, 'epoch': 1} {'type': 'loss', 'content': 0.1688985526561737, 'timestamp': '2025-10-01 04:15:13.681510', 'step': 2065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:13.714746', 'step': 2065, 'epoch': 1} {'type': 'loss', 'content': 0.1607569009065628, 'timestamp': '2025-10-01 04:15:13.717219', 'step': 2066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:13.751003', 'step': 2066, 'epoch': 1} {'type': 'loss', 'content': 0.19157062470912933, 'timestamp': '2025-10-01 04:15:13.752934', 'step': 2067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:13.786995', 'step': 2067, 'epoch': 1} {'type': 'loss', 'content': 0.1552312970161438, 'timestamp': '2025-10-01 04:15:13.810424', 'step': 2068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:13.843629', 'step': 2068, 'epoch': 1} {'type': 'loss', 'content': 0.21067267656326294, 'timestamp': '2025-10-01 04:15:13.845810', 'step': 2069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:13.879997', 'step': 2069, 'epoch': 1} {'type': 'loss', 'content': 0.16339871287345886, 'timestamp': '2025-10-01 04:15:13.890398', 'step': 2070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:13.923890', 'step': 2070, 'epoch': 1} {'type': 'loss', 'content': 0.23965702950954437, 'timestamp': '2025-10-01 04:15:13.925874', 'step': 2071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:13.965729', 'step': 2071, 'epoch': 1} {'type': 'loss', 'content': 0.2024948000907898, 'timestamp': '2025-10-01 04:15:13.990673', 'step': 2072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.030327', 'step': 2072, 'epoch': 1} {'type': 'loss', 'content': 0.14601187407970428, 'timestamp': '2025-10-01 04:15:14.032479', 'step': 2073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:14.072646', 'step': 2073, 'epoch': 1} {'type': 'loss', 'content': 0.13887371122837067, 'timestamp': '2025-10-01 04:15:14.074973', 'step': 2074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.113079', 'step': 2074, 'epoch': 1} {'type': 'loss', 'content': 0.1307569444179535, 'timestamp': '2025-10-01 04:15:14.115543', 'step': 2075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:14.147874', 'step': 2075, 'epoch': 1} {'type': 'loss', 'content': 0.12659865617752075, 'timestamp': '2025-10-01 04:15:14.171738', 'step': 2076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:14.207381', 'step': 2076, 'epoch': 1} {'type': 'loss', 'content': 0.291368305683136, 'timestamp': '2025-10-01 04:15:14.209740', 'step': 2077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.242943', 'step': 2077, 'epoch': 1} {'type': 'loss', 'content': 0.20642848312854767, 'timestamp': '2025-10-01 04:15:14.245240', 'step': 2078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:14.280143', 'step': 2078, 'epoch': 1} {'type': 'loss', 'content': 0.13205817341804504, 'timestamp': '2025-10-01 04:15:14.282688', 'step': 2079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:14.327271', 'step': 2079, 'epoch': 1} {'type': 'loss', 'content': 0.1875082105398178, 'timestamp': '2025-10-01 04:15:14.350860', 'step': 2080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:14.395056', 'step': 2080, 'epoch': 1} {'type': 'loss', 'content': 0.19009554386138916, 'timestamp': '2025-10-01 04:15:14.396983', 'step': 2081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:14.431774', 'step': 2081, 'epoch': 1} {'type': 'loss', 'content': 0.12121829390525818, 'timestamp': '2025-10-01 04:15:14.433947', 'step': 2082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.476179', 'step': 2082, 'epoch': 1} {'type': 'loss', 'content': 0.14795450866222382, 'timestamp': '2025-10-01 04:15:14.483468', 'step': 2083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.516562', 'step': 2083, 'epoch': 1} {'type': 'loss', 'content': 0.11997538805007935, 'timestamp': '2025-10-01 04:15:14.540095', 'step': 2084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:14.572806', 'step': 2084, 'epoch': 1} {'type': 'loss', 'content': 0.14086602628231049, 'timestamp': '2025-10-01 04:15:14.575013', 'step': 2085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:14.621687', 'step': 2085, 'epoch': 1} {'type': 'loss', 'content': 0.13020911812782288, 'timestamp': '2025-10-01 04:15:14.626242', 'step': 2086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.662508', 'step': 2086, 'epoch': 1} {'type': 'loss', 'content': 0.21018852293491364, 'timestamp': '2025-10-01 04:15:14.664750', 'step': 2087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:14.698582', 'step': 2087, 'epoch': 1} {'type': 'loss', 'content': 0.12141542136669159, 'timestamp': '2025-10-01 04:15:14.722239', 'step': 2088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.757693', 'step': 2088, 'epoch': 1} {'type': 'loss', 'content': 0.1640748232603073, 'timestamp': '2025-10-01 04:15:14.762999', 'step': 2089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.798642', 'step': 2089, 'epoch': 1} {'type': 'loss', 'content': 0.1255234330892563, 'timestamp': '2025-10-01 04:15:14.800737', 'step': 2090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.832579', 'step': 2090, 'epoch': 1} {'type': 'loss', 'content': 0.20463478565216064, 'timestamp': '2025-10-01 04:15:14.834772', 'step': 2091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.868922', 'step': 2091, 'epoch': 1} {'type': 'loss', 'content': 0.20973093807697296, 'timestamp': '2025-10-01 04:15:14.892402', 'step': 2092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:14.924560', 'step': 2092, 'epoch': 1} {'type': 'loss', 'content': 0.28545549511909485, 'timestamp': '2025-10-01 04:15:14.926632', 'step': 2093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:14.959587', 'step': 2093, 'epoch': 1} {'type': 'loss', 'content': 0.17397353053092957, 'timestamp': '2025-10-01 04:15:14.961800', 'step': 2094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:15.010642', 'step': 2094, 'epoch': 1} {'type': 'loss', 'content': 0.13294057548046112, 'timestamp': '2025-10-01 04:15:15.012729', 'step': 2095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:15.048404', 'step': 2095, 'epoch': 1} {'type': 'loss', 'content': 0.11502920836210251, 'timestamp': '2025-10-01 04:15:15.072246', 'step': 2096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:15.106538', 'step': 2096, 'epoch': 1} {'type': 'loss', 'content': 0.18707947432994843, 'timestamp': '2025-10-01 04:15:15.108847', 'step': 2097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:15.144929', 'step': 2097, 'epoch': 1} {'type': 'loss', 'content': 0.25165122747421265, 'timestamp': '2025-10-01 04:15:15.148238', 'step': 2098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:15.196185', 'step': 2098, 'epoch': 1} {'type': 'loss', 'content': 0.17493125796318054, 'timestamp': '2025-10-01 04:15:15.198583', 'step': 2099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:15.241575', 'step': 2099, 'epoch': 1} {'type': 'loss', 'content': 0.15413908660411835, 'timestamp': '2025-10-01 04:15:15.265017', 'step': 2100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:15.299110', 'step': 2100, 'epoch': 1} {'type': 'loss', 'content': 0.1709115207195282, 'timestamp': '2025-10-01 04:15:15.301205', 'step': 2101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:15.351687', 'step': 2101, 'epoch': 1} {'type': 'loss', 'content': 0.2224733680486679, 'timestamp': '2025-10-01 04:15:15.354152', 'step': 2102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:15.398060', 'step': 2102, 'epoch': 1} {'type': 'loss', 'content': 0.1860920786857605, 'timestamp': '2025-10-01 04:15:15.400523', 'step': 2103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:15.435493', 'step': 2103, 'epoch': 1} {'type': 'loss', 'content': 0.15696978569030762, 'timestamp': '2025-10-01 04:15:15.459217', 'step': 2104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:15.497738', 'step': 2104, 'epoch': 1} {'type': 'loss', 'content': 0.14613059163093567, 'timestamp': '2025-10-01 04:15:15.501377', 'step': 2105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:15.535767', 'step': 2105, 'epoch': 1} {'type': 'loss', 'content': 0.19217531383037567, 'timestamp': '2025-10-01 04:15:15.538072', 'step': 2106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:15.570846', 'step': 2106, 'epoch': 1} {'type': 'loss', 'content': 0.1775154024362564, 'timestamp': '2025-10-01 04:15:15.572886', 'step': 2107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:15.604521', 'step': 2107, 'epoch': 1} {'type': 'loss', 'content': 0.20439483225345612, 'timestamp': '2025-10-01 04:15:15.628147', 'step': 2108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:15.662077', 'step': 2108, 'epoch': 1} {'type': 'loss', 'content': 0.1976197212934494, 'timestamp': '2025-10-01 04:15:15.664054', 'step': 2109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:15.706976', 'step': 2109, 'epoch': 1} {'type': 'loss', 'content': 0.1163078099489212, 'timestamp': '2025-10-01 04:15:15.708999', 'step': 2110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:15.741800', 'step': 2110, 'epoch': 1} {'type': 'loss', 'content': 0.2107880711555481, 'timestamp': '2025-10-01 04:15:15.743901', 'step': 2111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:15.777739', 'step': 2111, 'epoch': 1} {'type': 'loss', 'content': 0.16851553320884705, 'timestamp': '2025-10-01 04:15:15.802258', 'step': 2112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:15.835691', 'step': 2112, 'epoch': 1} {'type': 'loss', 'content': 0.15074561536312103, 'timestamp': '2025-10-01 04:15:15.837955', 'step': 2113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:15.872801', 'step': 2113, 'epoch': 1} {'type': 'loss', 'content': 0.17078673839569092, 'timestamp': '2025-10-01 04:15:15.875632', 'step': 2114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:15.926739', 'step': 2114, 'epoch': 1} {'type': 'loss', 'content': 0.3169131577014923, 'timestamp': '2025-10-01 04:15:15.929534', 'step': 2115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:15.969845', 'step': 2115, 'epoch': 1} {'type': 'loss', 'content': 0.23804259300231934, 'timestamp': '2025-10-01 04:15:15.993785', 'step': 2116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:16.042207', 'step': 2116, 'epoch': 1} {'type': 'loss', 'content': 0.17457014322280884, 'timestamp': '2025-10-01 04:15:16.044571', 'step': 2117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:16.079906', 'step': 2117, 'epoch': 1} {'type': 'loss', 'content': 0.15018661320209503, 'timestamp': '2025-10-01 04:15:16.090474', 'step': 2118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:16.129902', 'step': 2118, 'epoch': 1} {'type': 'loss', 'content': 0.22301578521728516, 'timestamp': '2025-10-01 04:15:16.132319', 'step': 2119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:16.167153', 'step': 2119, 'epoch': 1} {'type': 'loss', 'content': 0.16602590680122375, 'timestamp': '2025-10-01 04:15:16.191251', 'step': 2120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:16.227001', 'step': 2120, 'epoch': 1} {'type': 'loss', 'content': 0.1335815042257309, 'timestamp': '2025-10-01 04:15:16.229425', 'step': 2121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:16.276868', 'step': 2121, 'epoch': 1} {'type': 'loss', 'content': 0.1736229509115219, 'timestamp': '2025-10-01 04:15:16.279340', 'step': 2122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:16.313849', 'step': 2122, 'epoch': 1} {'type': 'loss', 'content': 0.1793147176504135, 'timestamp': '2025-10-01 04:15:16.316431', 'step': 2123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:16.349461', 'step': 2123, 'epoch': 1} {'type': 'loss', 'content': 0.13763661682605743, 'timestamp': '2025-10-01 04:15:16.373347', 'step': 2124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:16.408316', 'step': 2124, 'epoch': 1} {'type': 'loss', 'content': 0.1773521602153778, 'timestamp': '2025-10-01 04:15:16.411727', 'step': 2125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:16.448834', 'step': 2125, 'epoch': 1} {'type': 'loss', 'content': 0.17314301431179047, 'timestamp': '2025-10-01 04:15:16.450751', 'step': 2126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:16.498031', 'step': 2126, 'epoch': 1} {'type': 'loss', 'content': 0.13018816709518433, 'timestamp': '2025-10-01 04:15:16.501339', 'step': 2127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:16.535231', 'step': 2127, 'epoch': 1} {'type': 'loss', 'content': 0.2871648669242859, 'timestamp': '2025-10-01 04:15:16.559537', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:16.617728', 'step': 2128, 'epoch': 1} {'type': 'loss', 'content': 0.22525103390216827, 'timestamp': '2025-10-01 04:15:16.621090', 'step': 2129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:16.658068', 'step': 2129, 'epoch': 1} {'type': 'loss', 'content': 0.12552358210086823, 'timestamp': '2025-10-01 04:15:16.666894', 'step': 2130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:16.702710', 'step': 2130, 'epoch': 1} {'type': 'loss', 'content': 0.1473998725414276, 'timestamp': '2025-10-01 04:15:16.705201', 'step': 2131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:16.741979', 'step': 2131, 'epoch': 1} {'type': 'loss', 'content': 0.23239392042160034, 'timestamp': '2025-10-01 04:15:16.765768', 'step': 2132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:16.803925', 'step': 2132, 'epoch': 1} {'type': 'loss', 'content': 0.1284743845462799, 'timestamp': '2025-10-01 04:15:16.806370', 'step': 2133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:16.841329', 'step': 2133, 'epoch': 1} {'type': 'loss', 'content': 0.11452551931142807, 'timestamp': '2025-10-01 04:15:16.851790', 'step': 2134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:16.886986', 'step': 2134, 'epoch': 1} {'type': 'loss', 'content': 0.3505009412765503, 'timestamp': '2025-10-01 04:15:16.889429', 'step': 2135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:16.923538', 'step': 2135, 'epoch': 1} {'type': 'loss', 'content': 0.08943980932235718, 'timestamp': '2025-10-01 04:15:16.947248', 'step': 2136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:16.982054', 'step': 2136, 'epoch': 1} {'type': 'loss', 'content': 0.17113417387008667, 'timestamp': '2025-10-01 04:15:16.984235', 'step': 2137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:17.020094', 'step': 2137, 'epoch': 1} {'type': 'loss', 'content': 0.20253463089466095, 'timestamp': '2025-10-01 04:15:17.022194', 'step': 2138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:17.056104', 'step': 2138, 'epoch': 1} {'type': 'loss', 'content': 0.08863382041454315, 'timestamp': '2025-10-01 04:15:17.058102', 'step': 2139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:17.113904', 'step': 2139, 'epoch': 1} {'type': 'loss', 'content': 0.17426007986068726, 'timestamp': '2025-10-01 04:15:17.137695', 'step': 2140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:17.175483', 'step': 2140, 'epoch': 1} {'type': 'loss', 'content': 0.19724735617637634, 'timestamp': '2025-10-01 04:15:17.177952', 'step': 2141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:17.214196', 'step': 2141, 'epoch': 1} {'type': 'loss', 'content': 0.123432956635952, 'timestamp': '2025-10-01 04:15:17.216510', 'step': 2142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:17.254129', 'step': 2142, 'epoch': 1} {'type': 'loss', 'content': 0.2061193883419037, 'timestamp': '2025-10-01 04:15:17.256518', 'step': 2143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:17.291226', 'step': 2143, 'epoch': 1} {'type': 'loss', 'content': 0.17279131710529327, 'timestamp': '2025-10-01 04:15:17.315023', 'step': 2144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:17.348689', 'step': 2144, 'epoch': 1} {'type': 'loss', 'content': 0.15145951509475708, 'timestamp': '2025-10-01 04:15:17.351021', 'step': 2145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:17.397192', 'step': 2145, 'epoch': 1} {'type': 'loss', 'content': 0.15422268211841583, 'timestamp': '2025-10-01 04:15:17.399571', 'step': 2146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:17.433478', 'step': 2146, 'epoch': 1} {'type': 'loss', 'content': 0.13363787531852722, 'timestamp': '2025-10-01 04:15:17.435789', 'step': 2147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:17.485799', 'step': 2147, 'epoch': 1} {'type': 'loss', 'content': 0.1625952422618866, 'timestamp': '2025-10-01 04:15:17.509573', 'step': 2148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:17.546067', 'step': 2148, 'epoch': 1} {'type': 'loss', 'content': 0.14176851511001587, 'timestamp': '2025-10-01 04:15:17.548420', 'step': 2149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:17.590310', 'step': 2149, 'epoch': 1} {'type': 'loss', 'content': 0.12118014693260193, 'timestamp': '2025-10-01 04:15:17.592888', 'step': 2150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:17.642538', 'step': 2150, 'epoch': 1} {'type': 'loss', 'content': 0.2596787214279175, 'timestamp': '2025-10-01 04:15:17.645189', 'step': 2151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:17.688238', 'step': 2151, 'epoch': 1} {'type': 'loss', 'content': 0.1569228172302246, 'timestamp': '2025-10-01 04:15:17.711991', 'step': 2152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:17.749232', 'step': 2152, 'epoch': 1} {'type': 'loss', 'content': 0.09850460290908813, 'timestamp': '2025-10-01 04:15:17.751281', 'step': 2153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:17.787926', 'step': 2153, 'epoch': 1} {'type': 'loss', 'content': 0.14985014498233795, 'timestamp': '2025-10-01 04:15:17.790048', 'step': 2154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:17.828540', 'step': 2154, 'epoch': 1} {'type': 'loss', 'content': 0.16269870102405548, 'timestamp': '2025-10-01 04:15:17.830850', 'step': 2155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:17.867895', 'step': 2155, 'epoch': 1} {'type': 'loss', 'content': 0.21383258700370789, 'timestamp': '2025-10-01 04:15:17.891483', 'step': 2156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:17.930803', 'step': 2156, 'epoch': 1} {'type': 'loss', 'content': 0.2094728797674179, 'timestamp': '2025-10-01 04:15:17.933018', 'step': 2157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:17.972857', 'step': 2157, 'epoch': 1} {'type': 'loss', 'content': 0.12844730913639069, 'timestamp': '2025-10-01 04:15:17.975309', 'step': 2158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:18.011118', 'step': 2158, 'epoch': 1} {'type': 'loss', 'content': 0.24722519516944885, 'timestamp': '2025-10-01 04:15:18.013255', 'step': 2159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:18.049136', 'step': 2159, 'epoch': 1} {'type': 'loss', 'content': 0.15791966021060944, 'timestamp': '2025-10-01 04:15:18.072851', 'step': 2160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:18.108126', 'step': 2160, 'epoch': 1} {'type': 'loss', 'content': 0.16532045602798462, 'timestamp': '2025-10-01 04:15:18.110283', 'step': 2161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:18.145586', 'step': 2161, 'epoch': 1} {'type': 'loss', 'content': 0.24897974729537964, 'timestamp': '2025-10-01 04:15:18.147868', 'step': 2162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:18.191611', 'step': 2162, 'epoch': 1} {'type': 'loss', 'content': 0.21405088901519775, 'timestamp': '2025-10-01 04:15:18.193919', 'step': 2163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:18.237846', 'step': 2163, 'epoch': 1} {'type': 'loss', 'content': 0.2000592052936554, 'timestamp': '2025-10-01 04:15:18.261390', 'step': 2164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:18.306937', 'step': 2164, 'epoch': 1} {'type': 'loss', 'content': 0.1998538225889206, 'timestamp': '2025-10-01 04:15:18.309569', 'step': 2165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:18.349051', 'step': 2165, 'epoch': 1} {'type': 'loss', 'content': 0.12010458111763, 'timestamp': '2025-10-01 04:15:18.350951', 'step': 2166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:18.404383', 'step': 2166, 'epoch': 1} {'type': 'loss', 'content': 0.1645558476448059, 'timestamp': '2025-10-01 04:15:18.406642', 'step': 2167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:18.440585', 'step': 2167, 'epoch': 1} {'type': 'loss', 'content': 0.27794674038887024, 'timestamp': '2025-10-01 04:15:18.464239', 'step': 2168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:18.508894', 'step': 2168, 'epoch': 1} {'type': 'loss', 'content': 0.20243826508522034, 'timestamp': '2025-10-01 04:15:18.510912', 'step': 2169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:18.544497', 'step': 2169, 'epoch': 1} {'type': 'loss', 'content': 0.1550266146659851, 'timestamp': '2025-10-01 04:15:18.546861', 'step': 2170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:18.578919', 'step': 2170, 'epoch': 1} {'type': 'loss', 'content': 0.21330775320529938, 'timestamp': '2025-10-01 04:15:18.580999', 'step': 2171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:18.614030', 'step': 2171, 'epoch': 1} {'type': 'loss', 'content': 0.270216703414917, 'timestamp': '2025-10-01 04:15:18.637593', 'step': 2172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:18.685674', 'step': 2172, 'epoch': 1} {'type': 'loss', 'content': 0.21187548339366913, 'timestamp': '2025-10-01 04:15:18.687758', 'step': 2173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:18.733235', 'step': 2173, 'epoch': 1} {'type': 'loss', 'content': 0.19880694150924683, 'timestamp': '2025-10-01 04:15:18.735255', 'step': 2174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:18.779974', 'step': 2174, 'epoch': 1} {'type': 'loss', 'content': 0.2323286086320877, 'timestamp': '2025-10-01 04:15:18.782057', 'step': 2175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:18.827455', 'step': 2175, 'epoch': 1} {'type': 'loss', 'content': 0.1728987842798233, 'timestamp': '2025-10-01 04:15:18.851334', 'step': 2176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:18.892627', 'step': 2176, 'epoch': 1} {'type': 'loss', 'content': 0.1403234452009201, 'timestamp': '2025-10-01 04:15:18.894757', 'step': 2177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:18.927244', 'step': 2177, 'epoch': 1} {'type': 'loss', 'content': 0.11272121965885162, 'timestamp': '2025-10-01 04:15:18.934962', 'step': 2178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:18.992673', 'step': 2178, 'epoch': 1} {'type': 'loss', 'content': 0.1053573340177536, 'timestamp': '2025-10-01 04:15:18.994738', 'step': 2179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:19.039363', 'step': 2179, 'epoch': 1} {'type': 'loss', 'content': 0.22301623225212097, 'timestamp': '2025-10-01 04:15:19.063003', 'step': 2180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:19.106897', 'step': 2180, 'epoch': 1} {'type': 'loss', 'content': 0.15750642120838165, 'timestamp': '2025-10-01 04:15:19.108908', 'step': 2181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:19.142658', 'step': 2181, 'epoch': 1} {'type': 'loss', 'content': 0.30200108885765076, 'timestamp': '2025-10-01 04:15:19.145310', 'step': 2182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:19.177191', 'step': 2182, 'epoch': 1} {'type': 'loss', 'content': 0.12787967920303345, 'timestamp': '2025-10-01 04:15:19.179570', 'step': 2183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:19.226175', 'step': 2183, 'epoch': 1} {'type': 'loss', 'content': 0.2431199848651886, 'timestamp': '2025-10-01 04:15:19.249694', 'step': 2184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:19.296304', 'step': 2184, 'epoch': 1} {'type': 'loss', 'content': 0.2649703919887543, 'timestamp': '2025-10-01 04:15:19.302475', 'step': 2185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:19.350472', 'step': 2185, 'epoch': 1} {'type': 'loss', 'content': 0.20360620319843292, 'timestamp': '2025-10-01 04:15:19.352648', 'step': 2186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:19.404173', 'step': 2186, 'epoch': 1} {'type': 'loss', 'content': 0.1707490235567093, 'timestamp': '2025-10-01 04:15:19.407229', 'step': 2187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:19.440511', 'step': 2187, 'epoch': 1} {'type': 'loss', 'content': 0.2391749918460846, 'timestamp': '2025-10-01 04:15:19.464077', 'step': 2188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:19.501772', 'step': 2188, 'epoch': 1} {'type': 'loss', 'content': 0.17232120037078857, 'timestamp': '2025-10-01 04:15:19.503786', 'step': 2189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:19.549251', 'step': 2189, 'epoch': 1} {'type': 'loss', 'content': 0.1541443020105362, 'timestamp': '2025-10-01 04:15:19.551450', 'step': 2190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:19.585784', 'step': 2190, 'epoch': 1} {'type': 'loss', 'content': 0.16145482659339905, 'timestamp': '2025-10-01 04:15:19.596511', 'step': 2191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:19.630264', 'step': 2191, 'epoch': 1} {'type': 'loss', 'content': 0.16297689080238342, 'timestamp': '2025-10-01 04:15:19.654961', 'step': 2192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:19.702570', 'step': 2192, 'epoch': 1} {'type': 'loss', 'content': 0.16129301488399506, 'timestamp': '2025-10-01 04:15:19.704769', 'step': 2193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:19.742587', 'step': 2193, 'epoch': 1} {'type': 'loss', 'content': 0.17533619701862335, 'timestamp': '2025-10-01 04:15:19.753527', 'step': 2194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:19.786186', 'step': 2194, 'epoch': 1} {'type': 'loss', 'content': 0.20332148671150208, 'timestamp': '2025-10-01 04:15:19.788394', 'step': 2195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:19.829479', 'step': 2195, 'epoch': 1} {'type': 'loss', 'content': 0.18533262610435486, 'timestamp': '2025-10-01 04:15:19.853048', 'step': 2196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:19.896350', 'step': 2196, 'epoch': 1} {'type': 'loss', 'content': 0.13381221890449524, 'timestamp': '2025-10-01 04:15:19.898651', 'step': 2197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:19.940819', 'step': 2197, 'epoch': 1} {'type': 'loss', 'content': 0.08260322362184525, 'timestamp': '2025-10-01 04:15:19.943196', 'step': 2198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:19.986658', 'step': 2198, 'epoch': 1} {'type': 'loss', 'content': 0.1923752874135971, 'timestamp': '2025-10-01 04:15:19.988747', 'step': 2199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:20.029377', 'step': 2199, 'epoch': 1} {'type': 'loss', 'content': 0.24599450826644897, 'timestamp': '2025-10-01 04:15:20.053263', 'step': 2200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:20.097446', 'step': 2200, 'epoch': 1} {'type': 'loss', 'content': 0.11439421027898788, 'timestamp': '2025-10-01 04:15:20.099416', 'step': 2201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:15:20.141331', 'step': 2201, 'epoch': 1} {'type': 'loss', 'content': 0.19397464394569397, 'timestamp': '2025-10-01 04:15:20.146080', 'step': 2202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:20.179481', 'step': 2202, 'epoch': 1} {'type': 'loss', 'content': 0.1731613129377365, 'timestamp': '2025-10-01 04:15:20.181745', 'step': 2203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:20.217112', 'step': 2203, 'epoch': 1} {'type': 'loss', 'content': 0.09346415102481842, 'timestamp': '2025-10-01 04:15:20.241668', 'step': 2204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:20.278217', 'step': 2204, 'epoch': 1} {'type': 'loss', 'content': 0.11680689454078674, 'timestamp': '2025-10-01 04:15:20.280739', 'step': 2205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:20.324858', 'step': 2205, 'epoch': 1} {'type': 'loss', 'content': 0.2795522212982178, 'timestamp': '2025-10-01 04:15:20.327079', 'step': 2206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:20.368575', 'step': 2206, 'epoch': 1} {'type': 'loss', 'content': 0.1560380756855011, 'timestamp': '2025-10-01 04:15:20.371415', 'step': 2207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:20.416585', 'step': 2207, 'epoch': 1} {'type': 'loss', 'content': 0.24840094149112701, 'timestamp': '2025-10-01 04:15:20.440393', 'step': 2208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:20.475553', 'step': 2208, 'epoch': 1} {'type': 'loss', 'content': 0.12209979444742203, 'timestamp': '2025-10-01 04:15:20.477769', 'step': 2209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:20.520003', 'step': 2209, 'epoch': 1} {'type': 'loss', 'content': 0.18665152788162231, 'timestamp': '2025-10-01 04:15:20.521971', 'step': 2210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:20.555215', 'step': 2210, 'epoch': 1} {'type': 'loss', 'content': 0.3355270326137543, 'timestamp': '2025-10-01 04:15:20.557261', 'step': 2211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:20.593059', 'step': 2211, 'epoch': 1} {'type': 'loss', 'content': 0.35015442967414856, 'timestamp': '2025-10-01 04:15:20.616902', 'step': 2212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:20.650697', 'step': 2212, 'epoch': 1} {'type': 'loss', 'content': 0.08243024349212646, 'timestamp': '2025-10-01 04:15:20.652658', 'step': 2213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:20.698349', 'step': 2213, 'epoch': 1} {'type': 'loss', 'content': 0.21264849603176117, 'timestamp': '2025-10-01 04:15:20.700388', 'step': 2214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:20.746217', 'step': 2214, 'epoch': 1} {'type': 'loss', 'content': 0.18019522726535797, 'timestamp': '2025-10-01 04:15:20.748716', 'step': 2215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:20.786376', 'step': 2215, 'epoch': 1} {'type': 'loss', 'content': 0.22089233994483948, 'timestamp': '2025-10-01 04:15:20.809934', 'step': 2216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:20.847267', 'step': 2216, 'epoch': 1} {'type': 'loss', 'content': 0.10179508477449417, 'timestamp': '2025-10-01 04:15:20.858145', 'step': 2217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:20.892257', 'step': 2217, 'epoch': 1} {'type': 'loss', 'content': 0.24581710994243622, 'timestamp': '2025-10-01 04:15:20.894280', 'step': 2218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:20.935150', 'step': 2218, 'epoch': 1} {'type': 'loss', 'content': 0.13626626133918762, 'timestamp': '2025-10-01 04:15:20.938095', 'step': 2219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:15:20.975610', 'step': 2219, 'epoch': 1} {'type': 'loss', 'content': 0.16141186654567719, 'timestamp': '2025-10-01 04:15:21.000967', 'step': 2220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:21.041257', 'step': 2220, 'epoch': 1} {'type': 'loss', 'content': 0.17923510074615479, 'timestamp': '2025-10-01 04:15:21.043289', 'step': 2221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:21.079675', 'step': 2221, 'epoch': 1} {'type': 'loss', 'content': 0.1254863440990448, 'timestamp': '2025-10-01 04:15:21.081802', 'step': 2222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:21.114466', 'step': 2222, 'epoch': 1} {'type': 'loss', 'content': 0.2414892017841339, 'timestamp': '2025-10-01 04:15:21.116559', 'step': 2223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:21.161628', 'step': 2223, 'epoch': 1} {'type': 'loss', 'content': 0.22294560074806213, 'timestamp': '2025-10-01 04:15:21.185266', 'step': 2224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:21.219881', 'step': 2224, 'epoch': 1} {'type': 'loss', 'content': 0.18813437223434448, 'timestamp': '2025-10-01 04:15:21.222135', 'step': 2225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:21.267103', 'step': 2225, 'epoch': 1} {'type': 'loss', 'content': 0.22120749950408936, 'timestamp': '2025-10-01 04:15:21.269278', 'step': 2226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:21.303880', 'step': 2226, 'epoch': 1} {'type': 'loss', 'content': 0.2129669487476349, 'timestamp': '2025-10-01 04:15:21.311271', 'step': 2227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:21.344609', 'step': 2227, 'epoch': 1} {'type': 'loss', 'content': 0.13099202513694763, 'timestamp': '2025-10-01 04:15:21.370345', 'step': 2228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:21.403589', 'step': 2228, 'epoch': 1} {'type': 'loss', 'content': 0.17541815340518951, 'timestamp': '2025-10-01 04:15:21.406514', 'step': 2229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:21.440430', 'step': 2229, 'epoch': 1} {'type': 'loss', 'content': 0.19921953976154327, 'timestamp': '2025-10-01 04:15:21.442550', 'step': 2230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:21.476944', 'step': 2230, 'epoch': 1} {'type': 'loss', 'content': 0.11897215992212296, 'timestamp': '2025-10-01 04:15:21.480011', 'step': 2231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:21.521913', 'step': 2231, 'epoch': 1} {'type': 'loss', 'content': 0.15704312920570374, 'timestamp': '2025-10-01 04:15:21.545547', 'step': 2232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:21.578679', 'step': 2232, 'epoch': 1} {'type': 'loss', 'content': 0.08542149513959885, 'timestamp': '2025-10-01 04:15:21.581161', 'step': 2233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:21.623514', 'step': 2233, 'epoch': 1} {'type': 'loss', 'content': 0.18877877295017242, 'timestamp': '2025-10-01 04:15:21.626311', 'step': 2234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:21.660243', 'step': 2234, 'epoch': 1} {'type': 'loss', 'content': 0.20244696736335754, 'timestamp': '2025-10-01 04:15:21.677020', 'step': 2235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:21.718269', 'step': 2235, 'epoch': 1} {'type': 'loss', 'content': 0.17412742972373962, 'timestamp': '2025-10-01 04:15:21.741770', 'step': 2236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:21.776675', 'step': 2236, 'epoch': 1} {'type': 'loss', 'content': 0.32812249660491943, 'timestamp': '2025-10-01 04:15:21.778779', 'step': 2237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:21.811519', 'step': 2237, 'epoch': 1} {'type': 'loss', 'content': 0.1493579000234604, 'timestamp': '2025-10-01 04:15:21.813634', 'step': 2238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:21.856303', 'step': 2238, 'epoch': 1} {'type': 'loss', 'content': 0.16479884088039398, 'timestamp': '2025-10-01 04:15:21.873494', 'step': 2239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:21.907327', 'step': 2239, 'epoch': 1} {'type': 'loss', 'content': 0.10337360948324203, 'timestamp': '2025-10-01 04:15:21.931509', 'step': 2240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:21.966494', 'step': 2240, 'epoch': 1} {'type': 'loss', 'content': 0.2319299578666687, 'timestamp': '2025-10-01 04:15:21.969884', 'step': 2241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:22.004265', 'step': 2241, 'epoch': 1} {'type': 'loss', 'content': 0.1799836903810501, 'timestamp': '2025-10-01 04:15:22.006243', 'step': 2242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:22.038960', 'step': 2242, 'epoch': 1} {'type': 'loss', 'content': 0.2344674915075302, 'timestamp': '2025-10-01 04:15:22.041708', 'step': 2243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:22.077182', 'step': 2243, 'epoch': 1} {'type': 'loss', 'content': 0.11439529061317444, 'timestamp': '2025-10-01 04:15:22.100770', 'step': 2244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:22.134326', 'step': 2244, 'epoch': 1} {'type': 'loss', 'content': 0.2291751205921173, 'timestamp': '2025-10-01 04:15:22.136400', 'step': 2245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:22.179708', 'step': 2245, 'epoch': 1} {'type': 'loss', 'content': 0.15762276947498322, 'timestamp': '2025-10-01 04:15:22.182625', 'step': 2246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:22.226263', 'step': 2246, 'epoch': 1} {'type': 'loss', 'content': 0.23751428723335266, 'timestamp': '2025-10-01 04:15:22.228726', 'step': 2247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:22.262531', 'step': 2247, 'epoch': 1} {'type': 'loss', 'content': 0.1615707278251648, 'timestamp': '2025-10-01 04:15:22.286154', 'step': 2248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:22.321109', 'step': 2248, 'epoch': 1} {'type': 'loss', 'content': 0.14737996459007263, 'timestamp': '2025-10-01 04:15:22.324467', 'step': 2249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:22.368917', 'step': 2249, 'epoch': 1} {'type': 'loss', 'content': 0.17973478138446808, 'timestamp': '2025-10-01 04:15:22.371334', 'step': 2250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:22.403372', 'step': 2250, 'epoch': 1} {'type': 'loss', 'content': 0.13247404992580414, 'timestamp': '2025-10-01 04:15:22.406149', 'step': 2251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:22.439543', 'step': 2251, 'epoch': 1} {'type': 'loss', 'content': 0.21493154764175415, 'timestamp': '2025-10-01 04:15:22.463139', 'step': 2252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:22.503953', 'step': 2252, 'epoch': 1} {'type': 'loss', 'content': 0.1687939316034317, 'timestamp': '2025-10-01 04:15:22.505968', 'step': 2253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:22.539931', 'step': 2253, 'epoch': 1} {'type': 'loss', 'content': 0.15303048491477966, 'timestamp': '2025-10-01 04:15:22.541888', 'step': 2254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:15:22.582701', 'step': 2254, 'epoch': 1} {'type': 'loss', 'content': 0.18861034512519836, 'timestamp': '2025-10-01 04:15:22.587146', 'step': 2255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:22.620462', 'step': 2255, 'epoch': 1} {'type': 'loss', 'content': 0.1472984105348587, 'timestamp': '2025-10-01 04:15:22.643990', 'step': 2256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:22.695043', 'step': 2256, 'epoch': 1} {'type': 'loss', 'content': 0.20594297349452972, 'timestamp': '2025-10-01 04:15:22.696930', 'step': 2257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:22.739749', 'step': 2257, 'epoch': 1} {'type': 'loss', 'content': 0.10423895716667175, 'timestamp': '2025-10-01 04:15:22.741858', 'step': 2258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:22.777458', 'step': 2258, 'epoch': 1} {'type': 'loss', 'content': 0.25773856043815613, 'timestamp': '2025-10-01 04:15:22.779954', 'step': 2259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:22.812387', 'step': 2259, 'epoch': 1} {'type': 'loss', 'content': 0.16294392943382263, 'timestamp': '2025-10-01 04:15:22.835945', 'step': 2260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:22.879592', 'step': 2260, 'epoch': 1} {'type': 'loss', 'content': 0.16314363479614258, 'timestamp': '2025-10-01 04:15:22.881406', 'step': 2261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:22.913465', 'step': 2261, 'epoch': 1} {'type': 'loss', 'content': 0.21780458092689514, 'timestamp': '2025-10-01 04:15:22.915534', 'step': 2262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:22.970446', 'step': 2262, 'epoch': 1} {'type': 'loss', 'content': 0.1543480008840561, 'timestamp': '2025-10-01 04:15:22.972876', 'step': 2263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:23.015919', 'step': 2263, 'epoch': 1} {'type': 'loss', 'content': 0.1366526335477829, 'timestamp': '2025-10-01 04:15:23.048576', 'step': 2264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:23.082113', 'step': 2264, 'epoch': 1} {'type': 'loss', 'content': 0.13657480478286743, 'timestamp': '2025-10-01 04:15:23.084074', 'step': 2265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:23.117329', 'step': 2265, 'epoch': 1} {'type': 'loss', 'content': 0.15474465489387512, 'timestamp': '2025-10-01 04:15:23.119972', 'step': 2266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:23.152856', 'step': 2266, 'epoch': 1} {'type': 'loss', 'content': 0.2087017148733139, 'timestamp': '2025-10-01 04:15:23.155024', 'step': 2267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:23.196928', 'step': 2267, 'epoch': 1} {'type': 'loss', 'content': 0.12147343158721924, 'timestamp': '2025-10-01 04:15:23.220421', 'step': 2268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:23.253692', 'step': 2268, 'epoch': 1} {'type': 'loss', 'content': 0.28574037551879883, 'timestamp': '2025-10-01 04:15:23.255585', 'step': 2269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:23.305008', 'step': 2269, 'epoch': 1} {'type': 'loss', 'content': 0.21741950511932373, 'timestamp': '2025-10-01 04:15:23.307845', 'step': 2270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:23.347920', 'step': 2270, 'epoch': 1} {'type': 'loss', 'content': 0.16479676961898804, 'timestamp': '2025-10-01 04:15:23.350248', 'step': 2271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:23.385566', 'step': 2271, 'epoch': 1} {'type': 'loss', 'content': 0.1410709023475647, 'timestamp': '2025-10-01 04:15:23.409258', 'step': 2272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:23.442579', 'step': 2272, 'epoch': 1} {'type': 'loss', 'content': 0.254820317029953, 'timestamp': '2025-10-01 04:15:23.444548', 'step': 2273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:23.487196', 'step': 2273, 'epoch': 1} {'type': 'loss', 'content': 0.13546231389045715, 'timestamp': '2025-10-01 04:15:23.489231', 'step': 2274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:23.533280', 'step': 2274, 'epoch': 1} {'type': 'loss', 'content': 0.23598451912403107, 'timestamp': '2025-10-01 04:15:23.535767', 'step': 2275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:23.574187', 'step': 2275, 'epoch': 1} {'type': 'loss', 'content': 0.20559459924697876, 'timestamp': '2025-10-01 04:15:23.597743', 'step': 2276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:23.632068', 'step': 2276, 'epoch': 1} {'type': 'loss', 'content': 0.19937916100025177, 'timestamp': '2025-10-01 04:15:23.634163', 'step': 2277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:23.666822', 'step': 2277, 'epoch': 1} {'type': 'loss', 'content': 0.24125060439109802, 'timestamp': '2025-10-01 04:15:23.669082', 'step': 2278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:23.702686', 'step': 2278, 'epoch': 1} {'type': 'loss', 'content': 0.24210315942764282, 'timestamp': '2025-10-01 04:15:23.704769', 'step': 2279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:23.747453', 'step': 2279, 'epoch': 1} {'type': 'loss', 'content': 0.18434228003025055, 'timestamp': '2025-10-01 04:15:23.771085', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:23.804761', 'step': 2280, 'epoch': 1} {'type': 'loss', 'content': 0.25453662872314453, 'timestamp': '2025-10-01 04:15:23.806866', 'step': 2281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:23.839687', 'step': 2281, 'epoch': 1} {'type': 'loss', 'content': 0.1958940476179123, 'timestamp': '2025-10-01 04:15:23.841907', 'step': 2282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:23.875083', 'step': 2282, 'epoch': 1} {'type': 'loss', 'content': 0.17659586668014526, 'timestamp': '2025-10-01 04:15:23.876904', 'step': 2283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:23.919518', 'step': 2283, 'epoch': 1} {'type': 'loss', 'content': 0.20441965758800507, 'timestamp': '2025-10-01 04:15:23.943078', 'step': 2284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:23.987345', 'step': 2284, 'epoch': 1} {'type': 'loss', 'content': 0.1124190017580986, 'timestamp': '2025-10-01 04:15:23.989308', 'step': 2285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:24.031279', 'step': 2285, 'epoch': 1} {'type': 'loss', 'content': 0.11634773761034012, 'timestamp': '2025-10-01 04:15:24.033587', 'step': 2286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:24.066791', 'step': 2286, 'epoch': 1} {'type': 'loss', 'content': 0.14484058320522308, 'timestamp': '2025-10-01 04:15:24.068840', 'step': 2287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:24.101847', 'step': 2287, 'epoch': 1} {'type': 'loss', 'content': 0.17082194983959198, 'timestamp': '2025-10-01 04:15:24.125698', 'step': 2288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:24.159046', 'step': 2288, 'epoch': 1} {'type': 'loss', 'content': 0.2489795982837677, 'timestamp': '2025-10-01 04:15:24.168882', 'step': 2289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:24.202049', 'step': 2289, 'epoch': 1} {'type': 'loss', 'content': 0.20433470606803894, 'timestamp': '2025-10-01 04:15:24.217650', 'step': 2290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:24.255949', 'step': 2290, 'epoch': 1} {'type': 'loss', 'content': 0.22963429987430573, 'timestamp': '2025-10-01 04:15:24.257868', 'step': 2291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:24.292336', 'step': 2291, 'epoch': 1} {'type': 'loss', 'content': 0.16742421686649323, 'timestamp': '2025-10-01 04:15:24.317846', 'step': 2292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:24.349841', 'step': 2292, 'epoch': 1} {'type': 'loss', 'content': 0.1953457146883011, 'timestamp': '2025-10-01 04:15:24.353052', 'step': 2293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:24.393510', 'step': 2293, 'epoch': 1} {'type': 'loss', 'content': 0.18970707058906555, 'timestamp': '2025-10-01 04:15:24.395572', 'step': 2294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:24.428028', 'step': 2294, 'epoch': 1} {'type': 'loss', 'content': 0.24897365272045135, 'timestamp': '2025-10-01 04:15:24.429953', 'step': 2295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:24.469699', 'step': 2295, 'epoch': 1} {'type': 'loss', 'content': 0.14305050671100616, 'timestamp': '2025-10-01 04:15:24.493385', 'step': 2296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:24.526405', 'step': 2296, 'epoch': 1} {'type': 'loss', 'content': 0.1988028734922409, 'timestamp': '2025-10-01 04:15:24.528447', 'step': 2297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:24.559580', 'step': 2297, 'epoch': 1} {'type': 'loss', 'content': 0.10851690918207169, 'timestamp': '2025-10-01 04:15:24.561892', 'step': 2298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:24.593021', 'step': 2298, 'epoch': 1} {'type': 'loss', 'content': 0.17823706567287445, 'timestamp': '2025-10-01 04:15:24.594838', 'step': 2299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:24.626225', 'step': 2299, 'epoch': 1} {'type': 'loss', 'content': 0.18546481430530548, 'timestamp': '2025-10-01 04:15:24.649790', 'step': 2300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:24.682162', 'step': 2300, 'epoch': 1} {'type': 'loss', 'content': 0.14574022591114044, 'timestamp': '2025-10-01 04:15:24.691064', 'step': 2301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:24.728392', 'step': 2301, 'epoch': 1} {'type': 'loss', 'content': 0.14537298679351807, 'timestamp': '2025-10-01 04:15:24.730604', 'step': 2302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:24.773894', 'step': 2302, 'epoch': 1} {'type': 'loss', 'content': 0.23249725997447968, 'timestamp': '2025-10-01 04:15:24.776190', 'step': 2303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:24.826084', 'step': 2303, 'epoch': 1} {'type': 'loss', 'content': 0.18850275874137878, 'timestamp': '2025-10-01 04:15:24.849782', 'step': 2304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:24.908170', 'step': 2304, 'epoch': 1} {'type': 'loss', 'content': 0.17821206152439117, 'timestamp': '2025-10-01 04:15:24.910337', 'step': 2305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:24.944059', 'step': 2305, 'epoch': 1} {'type': 'loss', 'content': 0.21877652406692505, 'timestamp': '2025-10-01 04:15:24.945983', 'step': 2306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:24.985026', 'step': 2306, 'epoch': 1} {'type': 'loss', 'content': 0.1232093870639801, 'timestamp': '2025-10-01 04:15:24.987643', 'step': 2307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:25.023622', 'step': 2307, 'epoch': 1} {'type': 'loss', 'content': 0.35803496837615967, 'timestamp': '2025-10-01 04:15:25.047706', 'step': 2308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:25.092855', 'step': 2308, 'epoch': 1} {'type': 'loss', 'content': 0.22086697816848755, 'timestamp': '2025-10-01 04:15:25.095344', 'step': 2309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:25.133249', 'step': 2309, 'epoch': 1} {'type': 'loss', 'content': 0.2202014923095703, 'timestamp': '2025-10-01 04:15:25.135209', 'step': 2310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:25.177634', 'step': 2310, 'epoch': 1} {'type': 'loss', 'content': 0.10840144008398056, 'timestamp': '2025-10-01 04:15:25.181209', 'step': 2311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:25.232994', 'step': 2311, 'epoch': 1} {'type': 'loss', 'content': 0.16679252684116364, 'timestamp': '2025-10-01 04:15:25.257328', 'step': 2312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:25.308682', 'step': 2312, 'epoch': 1} {'type': 'loss', 'content': 0.13888683915138245, 'timestamp': '2025-10-01 04:15:25.311195', 'step': 2313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:25.358463', 'step': 2313, 'epoch': 1} {'type': 'loss', 'content': 0.11970308423042297, 'timestamp': '2025-10-01 04:15:25.360884', 'step': 2314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:25.399853', 'step': 2314, 'epoch': 1} {'type': 'loss', 'content': 0.11769115179777145, 'timestamp': '2025-10-01 04:15:25.402194', 'step': 2315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:25.447858', 'step': 2315, 'epoch': 1} {'type': 'loss', 'content': 0.10830667614936829, 'timestamp': '2025-10-01 04:15:25.471949', 'step': 2316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:25.505920', 'step': 2316, 'epoch': 1} {'type': 'loss', 'content': 0.12005024403333664, 'timestamp': '2025-10-01 04:15:25.509233', 'step': 2317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:25.561745', 'step': 2317, 'epoch': 1} {'type': 'loss', 'content': 0.11127268522977829, 'timestamp': '2025-10-01 04:15:25.576334', 'step': 2318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:25.616412', 'step': 2318, 'epoch': 1} {'type': 'loss', 'content': 0.18936826288700104, 'timestamp': '2025-10-01 04:15:25.623765', 'step': 2319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:15:25.663113', 'step': 2319, 'epoch': 1} {'type': 'loss', 'content': 0.13281802833080292, 'timestamp': '2025-10-01 04:15:25.692699', 'step': 2320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:25.726359', 'step': 2320, 'epoch': 1} {'type': 'loss', 'content': 0.2555234134197235, 'timestamp': '2025-10-01 04:15:25.736568', 'step': 2321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:25.783628', 'step': 2321, 'epoch': 1} {'type': 'loss', 'content': 0.18642164766788483, 'timestamp': '2025-10-01 04:15:25.786644', 'step': 2322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:25.828923', 'step': 2322, 'epoch': 1} {'type': 'loss', 'content': 0.16450735926628113, 'timestamp': '2025-10-01 04:15:25.831317', 'step': 2323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:25.875815', 'step': 2323, 'epoch': 1} {'type': 'loss', 'content': 0.21519812941551208, 'timestamp': '2025-10-01 04:15:25.910762', 'step': 2324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:25.943684', 'step': 2324, 'epoch': 1} {'type': 'loss', 'content': 0.2336832582950592, 'timestamp': '2025-10-01 04:15:25.952681', 'step': 2325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:25.994389', 'step': 2325, 'epoch': 1} {'type': 'loss', 'content': 0.12351681292057037, 'timestamp': '2025-10-01 04:15:26.001055', 'step': 2326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:26.035596', 'step': 2326, 'epoch': 1} {'type': 'loss', 'content': 0.1810775250196457, 'timestamp': '2025-10-01 04:15:26.041290', 'step': 2327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:26.080050', 'step': 2327, 'epoch': 1} {'type': 'loss', 'content': 0.1179811954498291, 'timestamp': '2025-10-01 04:15:26.111520', 'step': 2328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:26.148692', 'step': 2328, 'epoch': 1} {'type': 'loss', 'content': 0.14578025043010712, 'timestamp': '2025-10-01 04:15:26.154086', 'step': 2329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:26.201255', 'step': 2329, 'epoch': 1} {'type': 'loss', 'content': 0.14490164816379547, 'timestamp': '2025-10-01 04:15:26.212665', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:15:36.676872', 'step': 2330, 'epoch': 1} {'type': 'pplx', 'content': 7836.598315921713, 'timestamp': '2025-10-01 04:15:36.680725', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:36.712273', 'step': 2330, 'epoch': 1} {'type': 'loss', 'content': 0.17451970279216766, 'timestamp': '2025-10-01 04:15:36.714980', 'step': 2331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:36.748070', 'step': 2331, 'epoch': 1} {'type': 'loss', 'content': 0.23357608914375305, 'timestamp': '2025-10-01 04:15:36.772218', 'step': 2332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:36.805757', 'step': 2332, 'epoch': 1} {'type': 'loss', 'content': 0.13701552152633667, 'timestamp': '2025-10-01 04:15:36.807981', 'step': 2333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:36.848523', 'step': 2333, 'epoch': 1} {'type': 'loss', 'content': 0.18525990843772888, 'timestamp': '2025-10-01 04:15:36.850860', 'step': 2334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:36.886030', 'step': 2334, 'epoch': 1} {'type': 'loss', 'content': 0.14231210947036743, 'timestamp': '2025-10-01 04:15:36.888055', 'step': 2335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:36.923722', 'step': 2335, 'epoch': 1} {'type': 'loss', 'content': 0.17500190436840057, 'timestamp': '2025-10-01 04:15:36.947384', 'step': 2336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:36.978346', 'step': 2336, 'epoch': 1} {'type': 'loss', 'content': 0.17500606179237366, 'timestamp': '2025-10-01 04:15:36.980384', 'step': 2337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:37.013148', 'step': 2337, 'epoch': 1} {'type': 'loss', 'content': 0.10594551265239716, 'timestamp': '2025-10-01 04:15:37.017480', 'step': 2338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:37.048921', 'step': 2338, 'epoch': 1} {'type': 'loss', 'content': 0.12623202800750732, 'timestamp': '2025-10-01 04:15:37.051317', 'step': 2339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:37.085578', 'step': 2339, 'epoch': 1} {'type': 'loss', 'content': 0.20253902673721313, 'timestamp': '2025-10-01 04:15:37.109156', 'step': 2340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:37.147156', 'step': 2340, 'epoch': 1} {'type': 'loss', 'content': 0.1998559683561325, 'timestamp': '2025-10-01 04:15:37.149003', 'step': 2341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:37.182126', 'step': 2341, 'epoch': 1} {'type': 'loss', 'content': 0.14357061684131622, 'timestamp': '2025-10-01 04:15:37.184070', 'step': 2342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:37.221542', 'step': 2342, 'epoch': 1} {'type': 'loss', 'content': 0.1822662055492401, 'timestamp': '2025-10-01 04:15:37.223447', 'step': 2343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:37.255685', 'step': 2343, 'epoch': 1} {'type': 'loss', 'content': 0.1441260725259781, 'timestamp': '2025-10-01 04:15:37.279267', 'step': 2344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:37.313413', 'step': 2344, 'epoch': 1} {'type': 'loss', 'content': 0.20914973318576813, 'timestamp': '2025-10-01 04:15:37.316699', 'step': 2345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:37.351162', 'step': 2345, 'epoch': 1} {'type': 'loss', 'content': 0.1526007354259491, 'timestamp': '2025-10-01 04:15:37.353526', 'step': 2346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:37.398596', 'step': 2346, 'epoch': 1} {'type': 'loss', 'content': 0.15538839995861053, 'timestamp': '2025-10-01 04:15:37.401004', 'step': 2347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:37.448531', 'step': 2347, 'epoch': 1} {'type': 'loss', 'content': 0.15743882954120636, 'timestamp': '2025-10-01 04:15:37.477448', 'step': 2348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:37.517565', 'step': 2348, 'epoch': 1} {'type': 'loss', 'content': 0.1828099936246872, 'timestamp': '2025-10-01 04:15:37.519625', 'step': 2349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:37.554859', 'step': 2349, 'epoch': 1} {'type': 'loss', 'content': 0.15618398785591125, 'timestamp': '2025-10-01 04:15:37.557240', 'step': 2350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:37.593084', 'step': 2350, 'epoch': 1} {'type': 'loss', 'content': 0.25413408875465393, 'timestamp': '2025-10-01 04:15:37.595158', 'step': 2351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:37.642504', 'step': 2351, 'epoch': 1} {'type': 'loss', 'content': 0.07348048686981201, 'timestamp': '2025-10-01 04:15:37.666338', 'step': 2352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:37.700306', 'step': 2352, 'epoch': 1} {'type': 'loss', 'content': 0.12974220514297485, 'timestamp': '2025-10-01 04:15:37.704629', 'step': 2353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:37.738033', 'step': 2353, 'epoch': 1} {'type': 'loss', 'content': 0.08709164708852768, 'timestamp': '2025-10-01 04:15:37.739936', 'step': 2354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:37.781118', 'step': 2354, 'epoch': 1} {'type': 'loss', 'content': 0.2341717928647995, 'timestamp': '2025-10-01 04:15:37.783249', 'step': 2355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:37.817587', 'step': 2355, 'epoch': 1} {'type': 'loss', 'content': 0.15472841262817383, 'timestamp': '2025-10-01 04:15:37.841196', 'step': 2356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:37.872759', 'step': 2356, 'epoch': 1} {'type': 'loss', 'content': 0.15843483805656433, 'timestamp': '2025-10-01 04:15:37.874937', 'step': 2357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:37.907518', 'step': 2357, 'epoch': 1} {'type': 'loss', 'content': 0.12137801200151443, 'timestamp': '2025-10-01 04:15:37.909479', 'step': 2358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:37.944413', 'step': 2358, 'epoch': 1} {'type': 'loss', 'content': 0.19092325866222382, 'timestamp': '2025-10-01 04:15:37.946446', 'step': 2359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:37.978358', 'step': 2359, 'epoch': 1} {'type': 'loss', 'content': 0.20717822015285492, 'timestamp': '2025-10-01 04:15:38.002284', 'step': 2360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:38.034975', 'step': 2360, 'epoch': 1} {'type': 'loss', 'content': 0.26314958930015564, 'timestamp': '2025-10-01 04:15:38.037127', 'step': 2361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:38.074178', 'step': 2361, 'epoch': 1} {'type': 'loss', 'content': 0.19225460290908813, 'timestamp': '2025-10-01 04:15:38.076309', 'step': 2362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:38.112161', 'step': 2362, 'epoch': 1} {'type': 'loss', 'content': 0.14516864717006683, 'timestamp': '2025-10-01 04:15:38.114178', 'step': 2363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:15:38.147281', 'step': 2363, 'epoch': 1} {'type': 'loss', 'content': 0.35889068245887756, 'timestamp': '2025-10-01 04:15:38.172616', 'step': 2364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:38.204234', 'step': 2364, 'epoch': 1} {'type': 'loss', 'content': 0.21728846430778503, 'timestamp': '2025-10-01 04:15:38.206218', 'step': 2365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:38.237577', 'step': 2365, 'epoch': 1} {'type': 'loss', 'content': 0.27052274346351624, 'timestamp': '2025-10-01 04:15:38.239967', 'step': 2366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:38.272374', 'step': 2366, 'epoch': 1} {'type': 'loss', 'content': 0.15183685719966888, 'timestamp': '2025-10-01 04:15:38.274496', 'step': 2367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:38.306094', 'step': 2367, 'epoch': 1} {'type': 'loss', 'content': 0.2032712697982788, 'timestamp': '2025-10-01 04:15:38.330463', 'step': 2368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:38.365232', 'step': 2368, 'epoch': 1} {'type': 'loss', 'content': 0.25574642419815063, 'timestamp': '2025-10-01 04:15:38.367409', 'step': 2369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:38.400003', 'step': 2369, 'epoch': 1} {'type': 'loss', 'content': 0.15204721689224243, 'timestamp': '2025-10-01 04:15:38.402248', 'step': 2370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:38.434040', 'step': 2370, 'epoch': 1} {'type': 'loss', 'content': 0.1502702385187149, 'timestamp': '2025-10-01 04:15:38.435995', 'step': 2371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:38.470511', 'step': 2371, 'epoch': 1} {'type': 'loss', 'content': 0.12513330578804016, 'timestamp': '2025-10-01 04:15:38.494073', 'step': 2372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:38.530462', 'step': 2372, 'epoch': 1} {'type': 'loss', 'content': 0.19964519143104553, 'timestamp': '2025-10-01 04:15:38.532525', 'step': 2373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:38.565626', 'step': 2373, 'epoch': 1} {'type': 'loss', 'content': 0.20970776677131653, 'timestamp': '2025-10-01 04:15:38.567879', 'step': 2374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:38.600220', 'step': 2374, 'epoch': 1} {'type': 'loss', 'content': 0.24794140458106995, 'timestamp': '2025-10-01 04:15:38.602404', 'step': 2375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:38.633988', 'step': 2375, 'epoch': 1} {'type': 'loss', 'content': 0.18125870823860168, 'timestamp': '2025-10-01 04:15:38.657515', 'step': 2376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:38.689866', 'step': 2376, 'epoch': 1} {'type': 'loss', 'content': 0.2719157636165619, 'timestamp': '2025-10-01 04:15:38.691943', 'step': 2377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:38.723152', 'step': 2377, 'epoch': 1} {'type': 'loss', 'content': 0.23986007273197174, 'timestamp': '2025-10-01 04:15:38.725164', 'step': 2378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:38.757001', 'step': 2378, 'epoch': 1} {'type': 'loss', 'content': 0.1419152021408081, 'timestamp': '2025-10-01 04:15:38.759049', 'step': 2379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:38.790505', 'step': 2379, 'epoch': 1} {'type': 'loss', 'content': 0.15793025493621826, 'timestamp': '2025-10-01 04:15:38.814366', 'step': 2380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:38.845689', 'step': 2380, 'epoch': 1} {'type': 'loss', 'content': 0.11560183763504028, 'timestamp': '2025-10-01 04:15:38.847848', 'step': 2381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:38.878592', 'step': 2381, 'epoch': 1} {'type': 'loss', 'content': 0.21455299854278564, 'timestamp': '2025-10-01 04:15:38.880933', 'step': 2382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:38.911952', 'step': 2382, 'epoch': 1} {'type': 'loss', 'content': 0.19390681385993958, 'timestamp': '2025-10-01 04:15:38.914015', 'step': 2383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:38.945549', 'step': 2383, 'epoch': 1} {'type': 'loss', 'content': 0.3034583032131195, 'timestamp': '2025-10-01 04:15:38.969916', 'step': 2384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:39.002280', 'step': 2384, 'epoch': 1} {'type': 'loss', 'content': 0.20126844942569733, 'timestamp': '2025-10-01 04:15:39.004312', 'step': 2385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:39.036218', 'step': 2385, 'epoch': 1} {'type': 'loss', 'content': 0.21684643626213074, 'timestamp': '2025-10-01 04:15:39.038644', 'step': 2386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:39.071834', 'step': 2386, 'epoch': 1} {'type': 'loss', 'content': 0.1708972007036209, 'timestamp': '2025-10-01 04:15:39.073901', 'step': 2387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:39.108656', 'step': 2387, 'epoch': 1} {'type': 'loss', 'content': 0.1767585575580597, 'timestamp': '2025-10-01 04:15:39.132427', 'step': 2388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:39.164207', 'step': 2388, 'epoch': 1} {'type': 'loss', 'content': 0.1906617432832718, 'timestamp': '2025-10-01 04:15:39.179845', 'step': 2389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:39.216131', 'step': 2389, 'epoch': 1} {'type': 'loss', 'content': 0.21068266034126282, 'timestamp': '2025-10-01 04:15:39.218255', 'step': 2390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:39.250868', 'step': 2390, 'epoch': 1} {'type': 'loss', 'content': 0.25968238711357117, 'timestamp': '2025-10-01 04:15:39.253392', 'step': 2391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:39.286418', 'step': 2391, 'epoch': 1} {'type': 'loss', 'content': 0.20354269444942474, 'timestamp': '2025-10-01 04:15:39.310854', 'step': 2392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:39.341947', 'step': 2392, 'epoch': 1} {'type': 'loss', 'content': 0.2544088363647461, 'timestamp': '2025-10-01 04:15:39.343971', 'step': 2393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:39.375377', 'step': 2393, 'epoch': 1} {'type': 'loss', 'content': 0.11916790902614594, 'timestamp': '2025-10-01 04:15:39.377383', 'step': 2394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:39.409132', 'step': 2394, 'epoch': 1} {'type': 'loss', 'content': 0.20490367710590363, 'timestamp': '2025-10-01 04:15:39.411136', 'step': 2395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:39.448944', 'step': 2395, 'epoch': 1} {'type': 'loss', 'content': 0.20944973826408386, 'timestamp': '2025-10-01 04:15:39.472597', 'step': 2396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:39.506826', 'step': 2396, 'epoch': 1} {'type': 'loss', 'content': 0.11087730526924133, 'timestamp': '2025-10-01 04:15:39.509028', 'step': 2397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:39.541710', 'step': 2397, 'epoch': 1} {'type': 'loss', 'content': 0.13301169872283936, 'timestamp': '2025-10-01 04:15:39.543757', 'step': 2398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:39.575467', 'step': 2398, 'epoch': 1} {'type': 'loss', 'content': 0.18855035305023193, 'timestamp': '2025-10-01 04:15:39.577572', 'step': 2399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:39.607905', 'step': 2399, 'epoch': 1} {'type': 'loss', 'content': 0.16409814357757568, 'timestamp': '2025-10-01 04:15:39.634360', 'step': 2400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:39.667315', 'step': 2400, 'epoch': 1} {'type': 'loss', 'content': 0.1797669231891632, 'timestamp': '2025-10-01 04:15:39.669347', 'step': 2401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:39.703598', 'step': 2401, 'epoch': 1} {'type': 'loss', 'content': 0.12930423021316528, 'timestamp': '2025-10-01 04:15:39.705691', 'step': 2402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:39.740500', 'step': 2402, 'epoch': 1} {'type': 'loss', 'content': 0.18532970547676086, 'timestamp': '2025-10-01 04:15:39.742602', 'step': 2403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:39.774842', 'step': 2403, 'epoch': 1} {'type': 'loss', 'content': 0.12569916248321533, 'timestamp': '2025-10-01 04:15:39.799226', 'step': 2404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:39.831987', 'step': 2404, 'epoch': 1} {'type': 'loss', 'content': 0.13719099760055542, 'timestamp': '2025-10-01 04:15:39.834045', 'step': 2405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:39.868338', 'step': 2405, 'epoch': 1} {'type': 'loss', 'content': 0.1729978322982788, 'timestamp': '2025-10-01 04:15:39.871297', 'step': 2406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:39.910276', 'step': 2406, 'epoch': 1} {'type': 'loss', 'content': 0.16125743091106415, 'timestamp': '2025-10-01 04:15:39.912441', 'step': 2407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:39.951796', 'step': 2407, 'epoch': 1} {'type': 'loss', 'content': 0.1504976600408554, 'timestamp': '2025-10-01 04:15:39.975371', 'step': 2408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:40.016028', 'step': 2408, 'epoch': 1} {'type': 'loss', 'content': 0.20836812257766724, 'timestamp': '2025-10-01 04:15:40.018363', 'step': 2409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:40.059323', 'step': 2409, 'epoch': 1} {'type': 'loss', 'content': 0.2375473529100418, 'timestamp': '2025-10-01 04:15:40.061769', 'step': 2410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:40.093403', 'step': 2410, 'epoch': 1} {'type': 'loss', 'content': 0.2994827628135681, 'timestamp': '2025-10-01 04:15:40.095640', 'step': 2411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:40.131119', 'step': 2411, 'epoch': 1} {'type': 'loss', 'content': 0.23199860751628876, 'timestamp': '2025-10-01 04:15:40.154616', 'step': 2412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:40.187983', 'step': 2412, 'epoch': 1} {'type': 'loss', 'content': 0.20482416450977325, 'timestamp': '2025-10-01 04:15:40.190078', 'step': 2413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:40.226167', 'step': 2413, 'epoch': 1} {'type': 'loss', 'content': 0.20751281082630157, 'timestamp': '2025-10-01 04:15:40.228349', 'step': 2414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:40.260627', 'step': 2414, 'epoch': 1} {'type': 'loss', 'content': 0.2596879303455353, 'timestamp': '2025-10-01 04:15:40.262677', 'step': 2415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:40.304713', 'step': 2415, 'epoch': 1} {'type': 'loss', 'content': 0.26726385951042175, 'timestamp': '2025-10-01 04:15:40.328485', 'step': 2416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:40.359551', 'step': 2416, 'epoch': 1} {'type': 'loss', 'content': 0.1601865589618683, 'timestamp': '2025-10-01 04:15:40.361694', 'step': 2417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:40.395223', 'step': 2417, 'epoch': 1} {'type': 'loss', 'content': 0.2762194871902466, 'timestamp': '2025-10-01 04:15:40.397134', 'step': 2418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:40.434950', 'step': 2418, 'epoch': 1} {'type': 'loss', 'content': 0.16572609543800354, 'timestamp': '2025-10-01 04:15:40.437199', 'step': 2419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:40.468176', 'step': 2419, 'epoch': 1} {'type': 'loss', 'content': 0.16964782774448395, 'timestamp': '2025-10-01 04:15:40.508432', 'step': 2420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:40.555458', 'step': 2420, 'epoch': 1} {'type': 'loss', 'content': 0.16821913421154022, 'timestamp': '2025-10-01 04:15:40.557820', 'step': 2421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:40.588788', 'step': 2421, 'epoch': 1} {'type': 'loss', 'content': 0.2430645227432251, 'timestamp': '2025-10-01 04:15:40.590779', 'step': 2422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:40.626306', 'step': 2422, 'epoch': 1} {'type': 'loss', 'content': 0.2003178596496582, 'timestamp': '2025-10-01 04:15:40.635995', 'step': 2423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:40.673577', 'step': 2423, 'epoch': 1} {'type': 'loss', 'content': 0.1263308823108673, 'timestamp': '2025-10-01 04:15:40.697984', 'step': 2424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:40.729679', 'step': 2424, 'epoch': 1} {'type': 'loss', 'content': 0.18543967604637146, 'timestamp': '2025-10-01 04:15:40.731663', 'step': 2425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:40.762937', 'step': 2425, 'epoch': 1} {'type': 'loss', 'content': 0.1789843887090683, 'timestamp': '2025-10-01 04:15:40.765231', 'step': 2426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:40.797000', 'step': 2426, 'epoch': 1} {'type': 'loss', 'content': 0.1502709686756134, 'timestamp': '2025-10-01 04:15:40.799202', 'step': 2427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:40.839670', 'step': 2427, 'epoch': 1} {'type': 'loss', 'content': 0.25517919659614563, 'timestamp': '2025-10-01 04:15:40.863294', 'step': 2428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:40.903921', 'step': 2428, 'epoch': 1} {'type': 'loss', 'content': 0.13654376566410065, 'timestamp': '2025-10-01 04:15:40.906746', 'step': 2429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:40.940962', 'step': 2429, 'epoch': 1} {'type': 'loss', 'content': 0.12411294132471085, 'timestamp': '2025-10-01 04:15:40.943644', 'step': 2430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:40.975545', 'step': 2430, 'epoch': 1} {'type': 'loss', 'content': 0.20338168740272522, 'timestamp': '2025-10-01 04:15:40.978250', 'step': 2431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.010095', 'step': 2431, 'epoch': 1} {'type': 'loss', 'content': 0.18025340139865875, 'timestamp': '2025-10-01 04:15:41.038273', 'step': 2432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.079462', 'step': 2432, 'epoch': 1} {'type': 'loss', 'content': 0.17991521954536438, 'timestamp': '2025-10-01 04:15:41.081712', 'step': 2433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:41.113366', 'step': 2433, 'epoch': 1} {'type': 'loss', 'content': 0.13167735934257507, 'timestamp': '2025-10-01 04:15:41.115325', 'step': 2434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.149868', 'step': 2434, 'epoch': 1} {'type': 'loss', 'content': 0.24086090922355652, 'timestamp': '2025-10-01 04:15:41.152639', 'step': 2435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.191202', 'step': 2435, 'epoch': 1} {'type': 'loss', 'content': 0.11652620881795883, 'timestamp': '2025-10-01 04:15:41.214864', 'step': 2436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.253712', 'step': 2436, 'epoch': 1} {'type': 'loss', 'content': 0.13201160728931427, 'timestamp': '2025-10-01 04:15:41.255894', 'step': 2437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.289418', 'step': 2437, 'epoch': 1} {'type': 'loss', 'content': 0.15160037577152252, 'timestamp': '2025-10-01 04:15:41.291664', 'step': 2438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:41.323419', 'step': 2438, 'epoch': 1} {'type': 'loss', 'content': 0.1605037897825241, 'timestamp': '2025-10-01 04:15:41.326339', 'step': 2439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:41.357312', 'step': 2439, 'epoch': 1} {'type': 'loss', 'content': 0.17707908153533936, 'timestamp': '2025-10-01 04:15:41.380949', 'step': 2440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.411877', 'step': 2440, 'epoch': 1} {'type': 'loss', 'content': 0.2953270375728607, 'timestamp': '2025-10-01 04:15:41.414443', 'step': 2441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.445186', 'step': 2441, 'epoch': 1} {'type': 'loss', 'content': 0.20758217573165894, 'timestamp': '2025-10-01 04:15:41.447422', 'step': 2442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:41.479190', 'step': 2442, 'epoch': 1} {'type': 'loss', 'content': 0.19772577285766602, 'timestamp': '2025-10-01 04:15:41.481350', 'step': 2443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:41.512202', 'step': 2443, 'epoch': 1} {'type': 'loss', 'content': 0.21994467079639435, 'timestamp': '2025-10-01 04:15:41.535847', 'step': 2444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:41.570429', 'step': 2444, 'epoch': 1} {'type': 'loss', 'content': 0.07092108577489853, 'timestamp': '2025-10-01 04:15:41.572919', 'step': 2445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:41.604086', 'step': 2445, 'epoch': 1} {'type': 'loss', 'content': 0.16966842114925385, 'timestamp': '2025-10-01 04:15:41.606770', 'step': 2446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:41.640915', 'step': 2446, 'epoch': 1} {'type': 'loss', 'content': 0.1318090856075287, 'timestamp': '2025-10-01 04:15:41.643073', 'step': 2447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.687581', 'step': 2447, 'epoch': 1} {'type': 'loss', 'content': 0.17557179927825928, 'timestamp': '2025-10-01 04:15:41.723714', 'step': 2448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:41.754335', 'step': 2448, 'epoch': 1} {'type': 'loss', 'content': 0.21280667185783386, 'timestamp': '2025-10-01 04:15:41.756511', 'step': 2449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:41.796579', 'step': 2449, 'epoch': 1} {'type': 'loss', 'content': 0.17089347541332245, 'timestamp': '2025-10-01 04:15:41.800032', 'step': 2450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:41.830864', 'step': 2450, 'epoch': 1} {'type': 'loss', 'content': 0.12513425946235657, 'timestamp': '2025-10-01 04:15:41.832730', 'step': 2451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.863402', 'step': 2451, 'epoch': 1} {'type': 'loss', 'content': 0.23923012614250183, 'timestamp': '2025-10-01 04:15:41.886900', 'step': 2452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.922933', 'step': 2452, 'epoch': 1} {'type': 'loss', 'content': 0.15871301293373108, 'timestamp': '2025-10-01 04:15:41.925172', 'step': 2453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:41.956796', 'step': 2453, 'epoch': 1} {'type': 'loss', 'content': 0.14996609091758728, 'timestamp': '2025-10-01 04:15:41.958757', 'step': 2454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:41.992148', 'step': 2454, 'epoch': 1} {'type': 'loss', 'content': 0.15446966886520386, 'timestamp': '2025-10-01 04:15:41.994227', 'step': 2455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:42.027231', 'step': 2455, 'epoch': 1} {'type': 'loss', 'content': 0.16738761961460114, 'timestamp': '2025-10-01 04:15:42.050843', 'step': 2456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:42.086892', 'step': 2456, 'epoch': 1} {'type': 'loss', 'content': 0.1727304756641388, 'timestamp': '2025-10-01 04:15:42.088988', 'step': 2457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:42.120578', 'step': 2457, 'epoch': 1} {'type': 'loss', 'content': 0.15089945495128632, 'timestamp': '2025-10-01 04:15:42.122652', 'step': 2458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:42.155280', 'step': 2458, 'epoch': 1} {'type': 'loss', 'content': 0.2474117875099182, 'timestamp': '2025-10-01 04:15:42.157216', 'step': 2459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:42.187942', 'step': 2459, 'epoch': 1} {'type': 'loss', 'content': 0.1528051346540451, 'timestamp': '2025-10-01 04:15:42.211448', 'step': 2460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:42.243315', 'step': 2460, 'epoch': 1} {'type': 'loss', 'content': 0.1894235610961914, 'timestamp': '2025-10-01 04:15:42.245681', 'step': 2461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:42.281517', 'step': 2461, 'epoch': 1} {'type': 'loss', 'content': 0.17995209991931915, 'timestamp': '2025-10-01 04:15:42.283616', 'step': 2462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:42.314681', 'step': 2462, 'epoch': 1} {'type': 'loss', 'content': 0.2573867738246918, 'timestamp': '2025-10-01 04:15:42.316770', 'step': 2463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:42.348932', 'step': 2463, 'epoch': 1} {'type': 'loss', 'content': 0.21948358416557312, 'timestamp': '2025-10-01 04:15:42.372990', 'step': 2464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:42.404980', 'step': 2464, 'epoch': 1} {'type': 'loss', 'content': 0.2385302484035492, 'timestamp': '2025-10-01 04:15:42.407023', 'step': 2465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:42.439066', 'step': 2465, 'epoch': 1} {'type': 'loss', 'content': 0.17817789316177368, 'timestamp': '2025-10-01 04:15:42.441268', 'step': 2466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:42.474207', 'step': 2466, 'epoch': 1} {'type': 'loss', 'content': 0.12333507090806961, 'timestamp': '2025-10-01 04:15:42.476229', 'step': 2467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:42.508929', 'step': 2467, 'epoch': 1} {'type': 'loss', 'content': 0.20080773532390594, 'timestamp': '2025-10-01 04:15:42.532652', 'step': 2468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:42.563686', 'step': 2468, 'epoch': 1} {'type': 'loss', 'content': 0.111115962266922, 'timestamp': '2025-10-01 04:15:42.565584', 'step': 2469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:42.596623', 'step': 2469, 'epoch': 1} {'type': 'loss', 'content': 0.31998077034950256, 'timestamp': '2025-10-01 04:15:42.598745', 'step': 2470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:42.629509', 'step': 2470, 'epoch': 1} {'type': 'loss', 'content': 0.15225958824157715, 'timestamp': '2025-10-01 04:15:42.631465', 'step': 2471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:42.670552', 'step': 2471, 'epoch': 1} {'type': 'loss', 'content': 0.15878307819366455, 'timestamp': '2025-10-01 04:15:42.696346', 'step': 2472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:42.726025', 'step': 2472, 'epoch': 1} {'type': 'loss', 'content': 0.18638373911380768, 'timestamp': '2025-10-01 04:15:42.728124', 'step': 2473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:42.763253', 'step': 2473, 'epoch': 1} {'type': 'loss', 'content': 0.20722302794456482, 'timestamp': '2025-10-01 04:15:42.765337', 'step': 2474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:42.795794', 'step': 2474, 'epoch': 1} {'type': 'loss', 'content': 0.11552417278289795, 'timestamp': '2025-10-01 04:15:42.798843', 'step': 2475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:42.830975', 'step': 2475, 'epoch': 1} {'type': 'loss', 'content': 0.18516065180301666, 'timestamp': '2025-10-01 04:15:42.854865', 'step': 2476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:42.886030', 'step': 2476, 'epoch': 1} {'type': 'loss', 'content': 0.23995421826839447, 'timestamp': '2025-10-01 04:15:42.888216', 'step': 2477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:42.918789', 'step': 2477, 'epoch': 1} {'type': 'loss', 'content': 0.09441263973712921, 'timestamp': '2025-10-01 04:15:42.921150', 'step': 2478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:42.951586', 'step': 2478, 'epoch': 1} {'type': 'loss', 'content': 0.15917521715164185, 'timestamp': '2025-10-01 04:15:42.953518', 'step': 2479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:42.988946', 'step': 2479, 'epoch': 1} {'type': 'loss', 'content': 0.13259762525558472, 'timestamp': '2025-10-01 04:15:43.012518', 'step': 2480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:43.056370', 'step': 2480, 'epoch': 1} {'type': 'loss', 'content': 0.2680654227733612, 'timestamp': '2025-10-01 04:15:43.058632', 'step': 2481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:43.090845', 'step': 2481, 'epoch': 1} {'type': 'loss', 'content': 0.15439032018184662, 'timestamp': '2025-10-01 04:15:43.092850', 'step': 2482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:43.126395', 'step': 2482, 'epoch': 1} {'type': 'loss', 'content': 0.22447943687438965, 'timestamp': '2025-10-01 04:15:43.128473', 'step': 2483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:43.163205', 'step': 2483, 'epoch': 1} {'type': 'loss', 'content': 0.1870054006576538, 'timestamp': '2025-10-01 04:15:43.186730', 'step': 2484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:43.218240', 'step': 2484, 'epoch': 1} {'type': 'loss', 'content': 0.2258862853050232, 'timestamp': '2025-10-01 04:15:43.220530', 'step': 2485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:43.251911', 'step': 2485, 'epoch': 1} {'type': 'loss', 'content': 0.2359699010848999, 'timestamp': '2025-10-01 04:15:43.254162', 'step': 2486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:43.300481', 'step': 2486, 'epoch': 1} {'type': 'loss', 'content': 0.12226773798465729, 'timestamp': '2025-10-01 04:15:43.303120', 'step': 2487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:43.338456', 'step': 2487, 'epoch': 1} {'type': 'loss', 'content': 0.2819674611091614, 'timestamp': '2025-10-01 04:15:43.362407', 'step': 2488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:43.395347', 'step': 2488, 'epoch': 1} {'type': 'loss', 'content': 0.18819092214107513, 'timestamp': '2025-10-01 04:15:43.397741', 'step': 2489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:43.432474', 'step': 2489, 'epoch': 1} {'type': 'loss', 'content': 0.1467631757259369, 'timestamp': '2025-10-01 04:15:43.437226', 'step': 2490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:43.480209', 'step': 2490, 'epoch': 1} {'type': 'loss', 'content': 0.20306611061096191, 'timestamp': '2025-10-01 04:15:43.482366', 'step': 2491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:43.525624', 'step': 2491, 'epoch': 1} {'type': 'loss', 'content': 0.14866752922534943, 'timestamp': '2025-10-01 04:15:43.549415', 'step': 2492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:43.581873', 'step': 2492, 'epoch': 1} {'type': 'loss', 'content': 0.13070951402187347, 'timestamp': '2025-10-01 04:15:43.583912', 'step': 2493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:43.615221', 'step': 2493, 'epoch': 1} {'type': 'loss', 'content': 0.2974523901939392, 'timestamp': '2025-10-01 04:15:43.618357', 'step': 2494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:43.657625', 'step': 2494, 'epoch': 1} {'type': 'loss', 'content': 0.2601483464241028, 'timestamp': '2025-10-01 04:15:43.659974', 'step': 2495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:43.691997', 'step': 2495, 'epoch': 1} {'type': 'loss', 'content': 0.14073546230793, 'timestamp': '2025-10-01 04:15:43.716609', 'step': 2496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:43.748530', 'step': 2496, 'epoch': 1} {'type': 'loss', 'content': 0.1309942901134491, 'timestamp': '2025-10-01 04:15:43.750449', 'step': 2497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:43.781342', 'step': 2497, 'epoch': 1} {'type': 'loss', 'content': 0.20338492095470428, 'timestamp': '2025-10-01 04:15:43.784280', 'step': 2498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:43.815827', 'step': 2498, 'epoch': 1} {'type': 'loss', 'content': 0.11349009722471237, 'timestamp': '2025-10-01 04:15:43.817975', 'step': 2499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:43.855041', 'step': 2499, 'epoch': 1} {'type': 'loss', 'content': 0.21034039556980133, 'timestamp': '2025-10-01 04:15:43.878658', 'step': 2500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-10-01 04:15:48.693584', 'step': 2500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:48.730749', 'step': 2500, 'epoch': 1} {'type': 'loss', 'content': 0.16869428753852844, 'timestamp': '2025-10-01 04:15:48.733099', 'step': 2501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:48.764775', 'step': 2501, 'epoch': 1} {'type': 'loss', 'content': 0.19771221280097961, 'timestamp': '2025-10-01 04:15:48.766737', 'step': 2502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:48.797960', 'step': 2502, 'epoch': 1} {'type': 'loss', 'content': 0.11954645067453384, 'timestamp': '2025-10-01 04:15:48.800017', 'step': 2503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:48.845388', 'step': 2503, 'epoch': 1} {'type': 'loss', 'content': 0.19405141472816467, 'timestamp': '2025-10-01 04:15:48.869458', 'step': 2504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:48.901825', 'step': 2504, 'epoch': 1} {'type': 'loss', 'content': 0.21728017926216125, 'timestamp': '2025-10-01 04:15:48.904156', 'step': 2505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:48.941531', 'step': 2505, 'epoch': 1} {'type': 'loss', 'content': 0.14714381098747253, 'timestamp': '2025-10-01 04:15:48.944127', 'step': 2506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:48.976912', 'step': 2506, 'epoch': 1} {'type': 'loss', 'content': 0.17157915234565735, 'timestamp': '2025-10-01 04:15:48.979100', 'step': 2507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:49.010535', 'step': 2507, 'epoch': 1} {'type': 'loss', 'content': 0.2851731479167938, 'timestamp': '2025-10-01 04:15:49.035384', 'step': 2508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:49.066580', 'step': 2508, 'epoch': 1} {'type': 'loss', 'content': 0.10657354444265366, 'timestamp': '2025-10-01 04:15:49.068927', 'step': 2509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:49.100522', 'step': 2509, 'epoch': 1} {'type': 'loss', 'content': 0.1614166498184204, 'timestamp': '2025-10-01 04:15:49.102501', 'step': 2510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:49.157523', 'step': 2510, 'epoch': 1} {'type': 'loss', 'content': 0.18090184032917023, 'timestamp': '2025-10-01 04:15:49.160066', 'step': 2511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:49.192023', 'step': 2511, 'epoch': 1} {'type': 'loss', 'content': 0.16258487105369568, 'timestamp': '2025-10-01 04:15:49.215552', 'step': 2512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:49.249782', 'step': 2512, 'epoch': 1} {'type': 'loss', 'content': 0.29858070611953735, 'timestamp': '2025-10-01 04:15:49.253213', 'step': 2513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:49.289234', 'step': 2513, 'epoch': 1} {'type': 'loss', 'content': 0.1570054292678833, 'timestamp': '2025-10-01 04:15:49.291458', 'step': 2514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:49.322641', 'step': 2514, 'epoch': 1} {'type': 'loss', 'content': 0.17137418687343597, 'timestamp': '2025-10-01 04:15:49.324720', 'step': 2515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:49.356594', 'step': 2515, 'epoch': 1} {'type': 'loss', 'content': 0.20337516069412231, 'timestamp': '2025-10-01 04:15:49.380917', 'step': 2516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:49.412134', 'step': 2516, 'epoch': 1} {'type': 'loss', 'content': 0.20160086452960968, 'timestamp': '2025-10-01 04:15:49.414252', 'step': 2517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:49.447940', 'step': 2517, 'epoch': 1} {'type': 'loss', 'content': 0.11143241822719574, 'timestamp': '2025-10-01 04:15:49.449988', 'step': 2518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:49.481612', 'step': 2518, 'epoch': 1} {'type': 'loss', 'content': 0.1576574444770813, 'timestamp': '2025-10-01 04:15:49.484113', 'step': 2519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:49.516588', 'step': 2519, 'epoch': 1} {'type': 'loss', 'content': 0.16835027933120728, 'timestamp': '2025-10-01 04:15:49.540407', 'step': 2520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:49.573412', 'step': 2520, 'epoch': 1} {'type': 'loss', 'content': 0.3133558928966522, 'timestamp': '2025-10-01 04:15:49.575441', 'step': 2521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:49.607313', 'step': 2521, 'epoch': 1} {'type': 'loss', 'content': 0.08635639399290085, 'timestamp': '2025-10-01 04:15:49.621165', 'step': 2522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:49.664599', 'step': 2522, 'epoch': 1} {'type': 'loss', 'content': 0.1312379539012909, 'timestamp': '2025-10-01 04:15:49.678748', 'step': 2523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:49.709324', 'step': 2523, 'epoch': 1} {'type': 'loss', 'content': 0.14274774491786957, 'timestamp': '2025-10-01 04:15:49.733034', 'step': 2524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:49.773239', 'step': 2524, 'epoch': 1} {'type': 'loss', 'content': 0.242164745926857, 'timestamp': '2025-10-01 04:15:49.775378', 'step': 2525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:49.806834', 'step': 2525, 'epoch': 1} {'type': 'loss', 'content': 0.12527655065059662, 'timestamp': '2025-10-01 04:15:49.809008', 'step': 2526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:49.845270', 'step': 2526, 'epoch': 1} {'type': 'loss', 'content': 0.25390198826789856, 'timestamp': '2025-10-01 04:15:49.847587', 'step': 2527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:49.892689', 'step': 2527, 'epoch': 1} {'type': 'loss', 'content': 0.2691558003425598, 'timestamp': '2025-10-01 04:15:49.916658', 'step': 2528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:49.948901', 'step': 2528, 'epoch': 1} {'type': 'loss', 'content': 0.18434090912342072, 'timestamp': '2025-10-01 04:15:49.951137', 'step': 2529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:49.991792', 'step': 2529, 'epoch': 1} {'type': 'loss', 'content': 0.21050241589546204, 'timestamp': '2025-10-01 04:15:49.993874', 'step': 2530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:50.032570', 'step': 2530, 'epoch': 1} {'type': 'loss', 'content': 0.2063066065311432, 'timestamp': '2025-10-01 04:15:50.034974', 'step': 2531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:50.065800', 'step': 2531, 'epoch': 1} {'type': 'loss', 'content': 0.19067247211933136, 'timestamp': '2025-10-01 04:15:50.096064', 'step': 2532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:50.126946', 'step': 2532, 'epoch': 1} {'type': 'loss', 'content': 0.17889969050884247, 'timestamp': '2025-10-01 04:15:50.129258', 'step': 2533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:50.161685', 'step': 2533, 'epoch': 1} {'type': 'loss', 'content': 0.13619481027126312, 'timestamp': '2025-10-01 04:15:50.164107', 'step': 2534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:50.204326', 'step': 2534, 'epoch': 1} {'type': 'loss', 'content': 0.17046986520290375, 'timestamp': '2025-10-01 04:15:50.206996', 'step': 2535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:50.241227', 'step': 2535, 'epoch': 1} {'type': 'loss', 'content': 0.22672085464000702, 'timestamp': '2025-10-01 04:15:50.264855', 'step': 2536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:50.302141', 'step': 2536, 'epoch': 1} {'type': 'loss', 'content': 0.1025366261601448, 'timestamp': '2025-10-01 04:15:50.304227', 'step': 2537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:50.335252', 'step': 2537, 'epoch': 1} {'type': 'loss', 'content': 0.10523822903633118, 'timestamp': '2025-10-01 04:15:50.337353', 'step': 2538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:50.369530', 'step': 2538, 'epoch': 1} {'type': 'loss', 'content': 0.1839262992143631, 'timestamp': '2025-10-01 04:15:50.372288', 'step': 2539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:50.402883', 'step': 2539, 'epoch': 1} {'type': 'loss', 'content': 0.1890396922826767, 'timestamp': '2025-10-01 04:15:50.427478', 'step': 2540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:50.467337', 'step': 2540, 'epoch': 1} {'type': 'loss', 'content': 0.19816048443317413, 'timestamp': '2025-10-01 04:15:50.478691', 'step': 2541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:50.510866', 'step': 2541, 'epoch': 1} {'type': 'loss', 'content': 0.1119389608502388, 'timestamp': '2025-10-01 04:15:50.513081', 'step': 2542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:50.554010', 'step': 2542, 'epoch': 1} {'type': 'loss', 'content': 0.12286239117383957, 'timestamp': '2025-10-01 04:15:50.562047', 'step': 2543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:50.592288', 'step': 2543, 'epoch': 1} {'type': 'loss', 'content': 0.17933352291584015, 'timestamp': '2025-10-01 04:15:50.615961', 'step': 2544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:50.651929', 'step': 2544, 'epoch': 1} {'type': 'loss', 'content': 0.17532776296138763, 'timestamp': '2025-10-01 04:15:50.654898', 'step': 2545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:50.686523', 'step': 2545, 'epoch': 1} {'type': 'loss', 'content': 0.14801354706287384, 'timestamp': '2025-10-01 04:15:50.688632', 'step': 2546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:50.719875', 'step': 2546, 'epoch': 1} {'type': 'loss', 'content': 0.18992803990840912, 'timestamp': '2025-10-01 04:15:50.721991', 'step': 2547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:50.752191', 'step': 2547, 'epoch': 1} {'type': 'loss', 'content': 0.25773102045059204, 'timestamp': '2025-10-01 04:15:50.776372', 'step': 2548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:50.813276', 'step': 2548, 'epoch': 1} {'type': 'loss', 'content': 0.22106516361236572, 'timestamp': '2025-10-01 04:15:50.815894', 'step': 2549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:50.849542', 'step': 2549, 'epoch': 1} {'type': 'loss', 'content': 0.11590306460857391, 'timestamp': '2025-10-01 04:15:50.861592', 'step': 2550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:50.911163', 'step': 2550, 'epoch': 1} {'type': 'loss', 'content': 0.21355880796909332, 'timestamp': '2025-10-01 04:15:50.913554', 'step': 2551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:50.946516', 'step': 2551, 'epoch': 1} {'type': 'loss', 'content': 0.11162373423576355, 'timestamp': '2025-10-01 04:15:50.982502', 'step': 2552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:51.022800', 'step': 2552, 'epoch': 1} {'type': 'loss', 'content': 0.2178141325712204, 'timestamp': '2025-10-01 04:15:51.025362', 'step': 2553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:51.056118', 'step': 2553, 'epoch': 1} {'type': 'loss', 'content': 0.17405278980731964, 'timestamp': '2025-10-01 04:15:51.059096', 'step': 2554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:51.089561', 'step': 2554, 'epoch': 1} {'type': 'loss', 'content': 0.1964239776134491, 'timestamp': '2025-10-01 04:15:51.093703', 'step': 2555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:51.131348', 'step': 2555, 'epoch': 1} {'type': 'loss', 'content': 0.14587104320526123, 'timestamp': '2025-10-01 04:15:51.155114', 'step': 2556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:51.186974', 'step': 2556, 'epoch': 1} {'type': 'loss', 'content': 0.1378760039806366, 'timestamp': '2025-10-01 04:15:51.189586', 'step': 2557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:51.222066', 'step': 2557, 'epoch': 1} {'type': 'loss', 'content': 0.14047732949256897, 'timestamp': '2025-10-01 04:15:51.224622', 'step': 2558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:51.258332', 'step': 2558, 'epoch': 1} {'type': 'loss', 'content': 0.09415270388126373, 'timestamp': '2025-10-01 04:15:51.260858', 'step': 2559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:51.292078', 'step': 2559, 'epoch': 1} {'type': 'loss', 'content': 0.22096474468708038, 'timestamp': '2025-10-01 04:15:51.317961', 'step': 2560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:51.355192', 'step': 2560, 'epoch': 1} {'type': 'loss', 'content': 0.12515173852443695, 'timestamp': '2025-10-01 04:15:51.357354', 'step': 2561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:51.388451', 'step': 2561, 'epoch': 1} {'type': 'loss', 'content': 0.06449756771326065, 'timestamp': '2025-10-01 04:15:51.391298', 'step': 2562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:51.422604', 'step': 2562, 'epoch': 1} {'type': 'loss', 'content': 0.09024299681186676, 'timestamp': '2025-10-01 04:15:51.428900', 'step': 2563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:51.473039', 'step': 2563, 'epoch': 1} {'type': 'loss', 'content': 0.08605235815048218, 'timestamp': '2025-10-01 04:15:51.507165', 'step': 2564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:51.538955', 'step': 2564, 'epoch': 1} {'type': 'loss', 'content': 0.15869390964508057, 'timestamp': '2025-10-01 04:15:51.542293', 'step': 2565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:51.573053', 'step': 2565, 'epoch': 1} {'type': 'loss', 'content': 0.1346846967935562, 'timestamp': '2025-10-01 04:15:51.580002', 'step': 2566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:51.610997', 'step': 2566, 'epoch': 1} {'type': 'loss', 'content': 0.14865262806415558, 'timestamp': '2025-10-01 04:15:51.613968', 'step': 2567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:51.644569', 'step': 2567, 'epoch': 1} {'type': 'loss', 'content': 0.14427122473716736, 'timestamp': '2025-10-01 04:15:51.668398', 'step': 2568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:51.706091', 'step': 2568, 'epoch': 1} {'type': 'loss', 'content': 0.11194980144500732, 'timestamp': '2025-10-01 04:15:51.709032', 'step': 2569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:51.740360', 'step': 2569, 'epoch': 1} {'type': 'loss', 'content': 0.16278864443302155, 'timestamp': '2025-10-01 04:15:51.748882', 'step': 2570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:51.780396', 'step': 2570, 'epoch': 1} {'type': 'loss', 'content': 0.25541627407073975, 'timestamp': '2025-10-01 04:15:51.783021', 'step': 2571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:51.814020', 'step': 2571, 'epoch': 1} {'type': 'loss', 'content': 0.14322422444820404, 'timestamp': '2025-10-01 04:15:51.838081', 'step': 2572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:51.872965', 'step': 2572, 'epoch': 1} {'type': 'loss', 'content': 0.09681352972984314, 'timestamp': '2025-10-01 04:15:51.875102', 'step': 2573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:51.917504', 'step': 2573, 'epoch': 1} {'type': 'loss', 'content': 0.13118642568588257, 'timestamp': '2025-10-01 04:15:51.919780', 'step': 2574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:15:51.955114', 'step': 2574, 'epoch': 1} {'type': 'loss', 'content': 0.1531275510787964, 'timestamp': '2025-10-01 04:15:51.959432', 'step': 2575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:51.991834', 'step': 2575, 'epoch': 1} {'type': 'loss', 'content': 0.08200124651193619, 'timestamp': '2025-10-01 04:15:52.015729', 'step': 2576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:52.058060', 'step': 2576, 'epoch': 1} {'type': 'loss', 'content': 0.10965947061777115, 'timestamp': '2025-10-01 04:15:52.060531', 'step': 2577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:52.095322', 'step': 2577, 'epoch': 1} {'type': 'loss', 'content': 0.2592547833919525, 'timestamp': '2025-10-01 04:15:52.097904', 'step': 2578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:52.137278', 'step': 2578, 'epoch': 1} {'type': 'loss', 'content': 0.23083235323429108, 'timestamp': '2025-10-01 04:15:52.139605', 'step': 2579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:52.174547', 'step': 2579, 'epoch': 1} {'type': 'loss', 'content': 0.09078487753868103, 'timestamp': '2025-10-01 04:15:52.198505', 'step': 2580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:52.243589', 'step': 2580, 'epoch': 1} {'type': 'loss', 'content': 0.18246717751026154, 'timestamp': '2025-10-01 04:15:52.246051', 'step': 2581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:52.288286', 'step': 2581, 'epoch': 1} {'type': 'loss', 'content': 0.18737611174583435, 'timestamp': '2025-10-01 04:15:52.292177', 'step': 2582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:52.323627', 'step': 2582, 'epoch': 1} {'type': 'loss', 'content': 0.15701255202293396, 'timestamp': '2025-10-01 04:15:52.330324', 'step': 2583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:52.375474', 'step': 2583, 'epoch': 1} {'type': 'loss', 'content': 0.18215765058994293, 'timestamp': '2025-10-01 04:15:52.399531', 'step': 2584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:52.466133', 'step': 2584, 'epoch': 1} {'type': 'loss', 'content': 0.1519576907157898, 'timestamp': '2025-10-01 04:15:52.468434', 'step': 2585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:52.526205', 'step': 2585, 'epoch': 1} {'type': 'loss', 'content': 0.14579059183597565, 'timestamp': '2025-10-01 04:15:52.528640', 'step': 2586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:52.564682', 'step': 2586, 'epoch': 1} {'type': 'loss', 'content': 0.16626930236816406, 'timestamp': '2025-10-01 04:15:52.566912', 'step': 2587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:52.604390', 'step': 2587, 'epoch': 1} {'type': 'loss', 'content': 0.18029284477233887, 'timestamp': '2025-10-01 04:15:52.641594', 'step': 2588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:52.701855', 'step': 2588, 'epoch': 1} {'type': 'loss', 'content': 0.15715649724006653, 'timestamp': '2025-10-01 04:15:52.716443', 'step': 2589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:52.756687', 'step': 2589, 'epoch': 1} {'type': 'loss', 'content': 0.13106726109981537, 'timestamp': '2025-10-01 04:15:52.773794', 'step': 2590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:52.809295', 'step': 2590, 'epoch': 1} {'type': 'loss', 'content': 0.19121046364307404, 'timestamp': '2025-10-01 04:15:52.811591', 'step': 2591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:52.856763', 'step': 2591, 'epoch': 1} {'type': 'loss', 'content': 0.143739253282547, 'timestamp': '2025-10-01 04:15:52.881720', 'step': 2592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:52.915562', 'step': 2592, 'epoch': 1} {'type': 'loss', 'content': 0.17542490363121033, 'timestamp': '2025-10-01 04:15:52.918147', 'step': 2593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:52.966576', 'step': 2593, 'epoch': 1} {'type': 'loss', 'content': 0.15941190719604492, 'timestamp': '2025-10-01 04:15:52.968983', 'step': 2594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:53.016944', 'step': 2594, 'epoch': 1} {'type': 'loss', 'content': 0.06273911893367767, 'timestamp': '2025-10-01 04:15:53.026729', 'step': 2595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:53.076455', 'step': 2595, 'epoch': 1} {'type': 'loss', 'content': 0.30167117714881897, 'timestamp': '2025-10-01 04:15:53.100091', 'step': 2596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:53.156462', 'step': 2596, 'epoch': 1} {'type': 'loss', 'content': 0.08732037991285324, 'timestamp': '2025-10-01 04:15:53.158765', 'step': 2597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:53.206517', 'step': 2597, 'epoch': 1} {'type': 'loss', 'content': 0.11029504984617233, 'timestamp': '2025-10-01 04:15:53.208733', 'step': 2598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:53.250857', 'step': 2598, 'epoch': 1} {'type': 'loss', 'content': 0.17130671441555023, 'timestamp': '2025-10-01 04:15:53.254923', 'step': 2599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:53.302832', 'step': 2599, 'epoch': 1} {'type': 'loss', 'content': 0.2156599462032318, 'timestamp': '2025-10-01 04:15:53.328429', 'step': 2600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:53.366887', 'step': 2600, 'epoch': 1} {'type': 'loss', 'content': 0.12395000457763672, 'timestamp': '2025-10-01 04:15:53.370415', 'step': 2601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:53.406573', 'step': 2601, 'epoch': 1} {'type': 'loss', 'content': 0.19314147531986237, 'timestamp': '2025-10-01 04:15:53.408754', 'step': 2602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:53.442073', 'step': 2602, 'epoch': 1} {'type': 'loss', 'content': 0.1313468962907791, 'timestamp': '2025-10-01 04:15:53.450795', 'step': 2603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:53.492480', 'step': 2603, 'epoch': 1} {'type': 'loss', 'content': 0.13248179852962494, 'timestamp': '2025-10-01 04:15:53.516117', 'step': 2604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:53.562364', 'step': 2604, 'epoch': 1} {'type': 'loss', 'content': 0.17672115564346313, 'timestamp': '2025-10-01 04:15:53.565053', 'step': 2605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:53.620862', 'step': 2605, 'epoch': 1} {'type': 'loss', 'content': 0.2448415756225586, 'timestamp': '2025-10-01 04:15:53.623353', 'step': 2606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:53.664693', 'step': 2606, 'epoch': 1} {'type': 'loss', 'content': 0.21354526281356812, 'timestamp': '2025-10-01 04:15:53.667216', 'step': 2607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:53.701354', 'step': 2607, 'epoch': 1} {'type': 'loss', 'content': 0.10150846838951111, 'timestamp': '2025-10-01 04:15:53.725360', 'step': 2608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:53.758709', 'step': 2608, 'epoch': 1} {'type': 'loss', 'content': 0.21317066252231598, 'timestamp': '2025-10-01 04:15:53.760604', 'step': 2609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:53.811031', 'step': 2609, 'epoch': 1} {'type': 'loss', 'content': 0.17876003682613373, 'timestamp': '2025-10-01 04:15:53.819736', 'step': 2610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:53.857005', 'step': 2610, 'epoch': 1} {'type': 'loss', 'content': 0.1422460973262787, 'timestamp': '2025-10-01 04:15:53.872658', 'step': 2611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:53.905057', 'step': 2611, 'epoch': 1} {'type': 'loss', 'content': 0.1404336839914322, 'timestamp': '2025-10-01 04:15:53.928731', 'step': 2612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:53.962826', 'step': 2612, 'epoch': 1} {'type': 'loss', 'content': 0.18359951674938202, 'timestamp': '2025-10-01 04:15:53.964915', 'step': 2613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:54.000605', 'step': 2613, 'epoch': 1} {'type': 'loss', 'content': 0.10882647335529327, 'timestamp': '2025-10-01 04:15:54.003050', 'step': 2614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:54.035250', 'step': 2614, 'epoch': 1} {'type': 'loss', 'content': 0.12914952635765076, 'timestamp': '2025-10-01 04:15:54.037878', 'step': 2615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:54.071278', 'step': 2615, 'epoch': 1} {'type': 'loss', 'content': 0.29391491413116455, 'timestamp': '2025-10-01 04:15:54.094811', 'step': 2616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:54.132282', 'step': 2616, 'epoch': 1} {'type': 'loss', 'content': 0.3162773847579956, 'timestamp': '2025-10-01 04:15:54.135543', 'step': 2617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:54.169602', 'step': 2617, 'epoch': 1} {'type': 'loss', 'content': 0.13760049641132355, 'timestamp': '2025-10-01 04:15:54.171773', 'step': 2618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:54.204582', 'step': 2618, 'epoch': 1} {'type': 'loss', 'content': 0.21373718976974487, 'timestamp': '2025-10-01 04:15:54.206819', 'step': 2619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:54.237450', 'step': 2619, 'epoch': 1} {'type': 'loss', 'content': 0.20208147168159485, 'timestamp': '2025-10-01 04:15:54.261228', 'step': 2620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:54.294246', 'step': 2620, 'epoch': 1} {'type': 'loss', 'content': 0.1169421598315239, 'timestamp': '2025-10-01 04:15:54.296534', 'step': 2621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:54.329918', 'step': 2621, 'epoch': 1} {'type': 'loss', 'content': 0.1052127331495285, 'timestamp': '2025-10-01 04:15:54.332447', 'step': 2622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:54.366973', 'step': 2622, 'epoch': 1} {'type': 'loss', 'content': 0.14815710484981537, 'timestamp': '2025-10-01 04:15:54.369267', 'step': 2623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:54.402465', 'step': 2623, 'epoch': 1} {'type': 'loss', 'content': 0.2316153645515442, 'timestamp': '2025-10-01 04:15:54.426231', 'step': 2624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:54.471680', 'step': 2624, 'epoch': 1} {'type': 'loss', 'content': 0.1314283162355423, 'timestamp': '2025-10-01 04:15:54.473622', 'step': 2625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:54.506195', 'step': 2625, 'epoch': 1} {'type': 'loss', 'content': 0.11553539335727692, 'timestamp': '2025-10-01 04:15:54.508704', 'step': 2626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:54.549875', 'step': 2626, 'epoch': 1} {'type': 'loss', 'content': 0.16223359107971191, 'timestamp': '2025-10-01 04:15:54.551986', 'step': 2627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:54.582543', 'step': 2627, 'epoch': 1} {'type': 'loss', 'content': 0.16105879843235016, 'timestamp': '2025-10-01 04:15:54.606120', 'step': 2628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:54.637951', 'step': 2628, 'epoch': 1} {'type': 'loss', 'content': 0.19029183685779572, 'timestamp': '2025-10-01 04:15:54.640633', 'step': 2629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:54.670987', 'step': 2629, 'epoch': 1} {'type': 'loss', 'content': 0.08425875008106232, 'timestamp': '2025-10-01 04:15:54.673594', 'step': 2630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:54.706551', 'step': 2630, 'epoch': 1} {'type': 'loss', 'content': 0.2106136828660965, 'timestamp': '2025-10-01 04:15:54.708559', 'step': 2631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:54.744717', 'step': 2631, 'epoch': 1} {'type': 'loss', 'content': 0.13834503293037415, 'timestamp': '2025-10-01 04:15:54.768438', 'step': 2632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:54.799671', 'step': 2632, 'epoch': 1} {'type': 'loss', 'content': 0.24660925567150116, 'timestamp': '2025-10-01 04:15:54.801792', 'step': 2633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:54.838408', 'step': 2633, 'epoch': 1} {'type': 'loss', 'content': 0.16530610620975494, 'timestamp': '2025-10-01 04:15:54.840520', 'step': 2634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:54.875921', 'step': 2634, 'epoch': 1} {'type': 'loss', 'content': 0.1603012979030609, 'timestamp': '2025-10-01 04:15:54.878180', 'step': 2635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:54.911082', 'step': 2635, 'epoch': 1} {'type': 'loss', 'content': 0.1884026825428009, 'timestamp': '2025-10-01 04:15:54.938570', 'step': 2636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:54.972494', 'step': 2636, 'epoch': 1} {'type': 'loss', 'content': 0.12078143656253815, 'timestamp': '2025-10-01 04:15:54.974772', 'step': 2637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:55.008037', 'step': 2637, 'epoch': 1} {'type': 'loss', 'content': 0.15293292701244354, 'timestamp': '2025-10-01 04:15:55.011143', 'step': 2638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:55.043975', 'step': 2638, 'epoch': 1} {'type': 'loss', 'content': 0.15843629837036133, 'timestamp': '2025-10-01 04:15:55.059753', 'step': 2639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:55.093524', 'step': 2639, 'epoch': 1} {'type': 'loss', 'content': 0.13147379457950592, 'timestamp': '2025-10-01 04:15:55.117105', 'step': 2640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:55.163817', 'step': 2640, 'epoch': 1} {'type': 'loss', 'content': 0.2445899099111557, 'timestamp': '2025-10-01 04:15:55.165941', 'step': 2641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:55.203399', 'step': 2641, 'epoch': 1} {'type': 'loss', 'content': 0.1955752968788147, 'timestamp': '2025-10-01 04:15:55.205525', 'step': 2642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:55.238457', 'step': 2642, 'epoch': 1} {'type': 'loss', 'content': 0.14184123277664185, 'timestamp': '2025-10-01 04:15:55.241090', 'step': 2643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:55.282803', 'step': 2643, 'epoch': 1} {'type': 'loss', 'content': 0.08429314941167831, 'timestamp': '2025-10-01 04:15:55.306472', 'step': 2644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:55.337110', 'step': 2644, 'epoch': 1} {'type': 'loss', 'content': 0.11953791230916977, 'timestamp': '2025-10-01 04:15:55.340060', 'step': 2645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:55.373892', 'step': 2645, 'epoch': 1} {'type': 'loss', 'content': 0.1367945373058319, 'timestamp': '2025-10-01 04:15:55.376282', 'step': 2646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:55.407495', 'step': 2646, 'epoch': 1} {'type': 'loss', 'content': 0.12728603184223175, 'timestamp': '2025-10-01 04:15:55.409552', 'step': 2647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:55.440028', 'step': 2647, 'epoch': 1} {'type': 'loss', 'content': 0.1353546679019928, 'timestamp': '2025-10-01 04:15:55.463681', 'step': 2648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:55.494703', 'step': 2648, 'epoch': 1} {'type': 'loss', 'content': 0.12093738466501236, 'timestamp': '2025-10-01 04:15:55.496874', 'step': 2649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:55.527315', 'step': 2649, 'epoch': 1} {'type': 'loss', 'content': 0.21822489798069, 'timestamp': '2025-10-01 04:15:55.529669', 'step': 2650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:55.560396', 'step': 2650, 'epoch': 1} {'type': 'loss', 'content': 0.2565247714519501, 'timestamp': '2025-10-01 04:15:55.562742', 'step': 2651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:55.592925', 'step': 2651, 'epoch': 1} {'type': 'loss', 'content': 0.21903319656848907, 'timestamp': '2025-10-01 04:15:55.616521', 'step': 2652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:55.647390', 'step': 2652, 'epoch': 1} {'type': 'loss', 'content': 0.125864639878273, 'timestamp': '2025-10-01 04:15:55.649542', 'step': 2653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:55.681239', 'step': 2653, 'epoch': 1} {'type': 'loss', 'content': 0.08351192623376846, 'timestamp': '2025-10-01 04:15:55.684054', 'step': 2654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:55.717958', 'step': 2654, 'epoch': 1} {'type': 'loss', 'content': 0.2518560588359833, 'timestamp': '2025-10-01 04:15:55.721045', 'step': 2655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:55.752084', 'step': 2655, 'epoch': 1} {'type': 'loss', 'content': 0.13657832145690918, 'timestamp': '2025-10-01 04:15:55.775736', 'step': 2656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:15:55.807866', 'step': 2656, 'epoch': 1} {'type': 'loss', 'content': 0.12161482125520706, 'timestamp': '2025-10-01 04:15:55.809982', 'step': 2657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:55.843181', 'step': 2657, 'epoch': 1} {'type': 'loss', 'content': 0.14511878788471222, 'timestamp': '2025-10-01 04:15:55.845446', 'step': 2658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:55.876685', 'step': 2658, 'epoch': 1} {'type': 'loss', 'content': 0.1300472915172577, 'timestamp': '2025-10-01 04:15:55.879241', 'step': 2659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:55.913614', 'step': 2659, 'epoch': 1} {'type': 'loss', 'content': 0.19186095893383026, 'timestamp': '2025-10-01 04:15:55.937495', 'step': 2660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:55.968058', 'step': 2660, 'epoch': 1} {'type': 'loss', 'content': 0.2868803143501282, 'timestamp': '2025-10-01 04:15:55.970279', 'step': 2661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:56.004527', 'step': 2661, 'epoch': 1} {'type': 'loss', 'content': 0.17954155802726746, 'timestamp': '2025-10-01 04:15:56.007007', 'step': 2662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:56.037294', 'step': 2662, 'epoch': 1} {'type': 'loss', 'content': 0.14508116245269775, 'timestamp': '2025-10-01 04:15:56.039723', 'step': 2663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:56.069939', 'step': 2663, 'epoch': 1} {'type': 'loss', 'content': 0.164988711476326, 'timestamp': '2025-10-01 04:15:56.093665', 'step': 2664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:56.124346', 'step': 2664, 'epoch': 1} {'type': 'loss', 'content': 0.18708346784114838, 'timestamp': '2025-10-01 04:15:56.126662', 'step': 2665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:56.157236', 'step': 2665, 'epoch': 1} {'type': 'loss', 'content': 0.12150920182466507, 'timestamp': '2025-10-01 04:15:56.159544', 'step': 2666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:56.191372', 'step': 2666, 'epoch': 1} {'type': 'loss', 'content': 0.19343721866607666, 'timestamp': '2025-10-01 04:15:56.194768', 'step': 2667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:56.225203', 'step': 2667, 'epoch': 1} {'type': 'loss', 'content': 0.16720448434352875, 'timestamp': '2025-10-01 04:15:56.249016', 'step': 2668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:56.280985', 'step': 2668, 'epoch': 1} {'type': 'loss', 'content': 0.2241346389055252, 'timestamp': '2025-10-01 04:15:56.283083', 'step': 2669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:56.314130', 'step': 2669, 'epoch': 1} {'type': 'loss', 'content': 0.1064690575003624, 'timestamp': '2025-10-01 04:15:56.316066', 'step': 2670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:56.346615', 'step': 2670, 'epoch': 1} {'type': 'loss', 'content': 0.157973974943161, 'timestamp': '2025-10-01 04:15:56.357051', 'step': 2671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:56.387136', 'step': 2671, 'epoch': 1} {'type': 'loss', 'content': 0.11908882111310959, 'timestamp': '2025-10-01 04:15:56.410997', 'step': 2672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:56.441074', 'step': 2672, 'epoch': 1} {'type': 'loss', 'content': 0.13724258542060852, 'timestamp': '2025-10-01 04:15:56.443256', 'step': 2673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:56.473851', 'step': 2673, 'epoch': 1} {'type': 'loss', 'content': 0.12586680054664612, 'timestamp': '2025-10-01 04:15:56.475954', 'step': 2674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:56.505959', 'step': 2674, 'epoch': 1} {'type': 'loss', 'content': 0.17961633205413818, 'timestamp': '2025-10-01 04:15:56.508463', 'step': 2675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:15:56.549048', 'step': 2675, 'epoch': 1} {'type': 'loss', 'content': 0.11803998053073883, 'timestamp': '2025-10-01 04:15:56.574747', 'step': 2676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:56.604647', 'step': 2676, 'epoch': 1} {'type': 'loss', 'content': 0.14152371883392334, 'timestamp': '2025-10-01 04:15:56.606887', 'step': 2677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:56.637612', 'step': 2677, 'epoch': 1} {'type': 'loss', 'content': 0.12206736207008362, 'timestamp': '2025-10-01 04:15:56.641114', 'step': 2678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:56.671476', 'step': 2678, 'epoch': 1} {'type': 'loss', 'content': 0.1625061184167862, 'timestamp': '2025-10-01 04:15:56.673776', 'step': 2679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:56.704301', 'step': 2679, 'epoch': 1} {'type': 'loss', 'content': 0.18768273293972015, 'timestamp': '2025-10-01 04:15:56.728056', 'step': 2680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:56.758120', 'step': 2680, 'epoch': 1} {'type': 'loss', 'content': 0.08887756615877151, 'timestamp': '2025-10-01 04:15:56.760246', 'step': 2681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:56.791686', 'step': 2681, 'epoch': 1} {'type': 'loss', 'content': 0.16909313201904297, 'timestamp': '2025-10-01 04:15:56.794059', 'step': 2682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:56.826856', 'step': 2682, 'epoch': 1} {'type': 'loss', 'content': 0.1695491075515747, 'timestamp': '2025-10-01 04:15:56.829053', 'step': 2683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:56.861616', 'step': 2683, 'epoch': 1} {'type': 'loss', 'content': 0.17388327419757843, 'timestamp': '2025-10-01 04:15:56.885945', 'step': 2684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:56.917328', 'step': 2684, 'epoch': 1} {'type': 'loss', 'content': 0.11750571429729462, 'timestamp': '2025-10-01 04:15:56.919602', 'step': 2685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:56.953947', 'step': 2685, 'epoch': 1} {'type': 'loss', 'content': 0.17324435710906982, 'timestamp': '2025-10-01 04:15:56.956027', 'step': 2686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:56.988193', 'step': 2686, 'epoch': 1} {'type': 'loss', 'content': 0.14909176528453827, 'timestamp': '2025-10-01 04:15:56.990355', 'step': 2687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:57.022178', 'step': 2687, 'epoch': 1} {'type': 'loss', 'content': 0.12886938452720642, 'timestamp': '2025-10-01 04:15:57.045840', 'step': 2688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:57.077429', 'step': 2688, 'epoch': 1} {'type': 'loss', 'content': 0.12745609879493713, 'timestamp': '2025-10-01 04:15:57.079513', 'step': 2689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:57.111570', 'step': 2689, 'epoch': 1} {'type': 'loss', 'content': 0.171939879655838, 'timestamp': '2025-10-01 04:15:57.113751', 'step': 2690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:57.144739', 'step': 2690, 'epoch': 1} {'type': 'loss', 'content': 0.16955353319644928, 'timestamp': '2025-10-01 04:15:57.147212', 'step': 2691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:57.179829', 'step': 2691, 'epoch': 1} {'type': 'loss', 'content': 0.22444438934326172, 'timestamp': '2025-10-01 04:15:57.203788', 'step': 2692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:57.235010', 'step': 2692, 'epoch': 1} {'type': 'loss', 'content': 0.15786920487880707, 'timestamp': '2025-10-01 04:15:57.237332', 'step': 2693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:57.270495', 'step': 2693, 'epoch': 1} {'type': 'loss', 'content': 0.16974709928035736, 'timestamp': '2025-10-01 04:15:57.272760', 'step': 2694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:57.306356', 'step': 2694, 'epoch': 1} {'type': 'loss', 'content': 0.14366626739501953, 'timestamp': '2025-10-01 04:15:57.308892', 'step': 2695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:57.342598', 'step': 2695, 'epoch': 1} {'type': 'loss', 'content': 0.13529878854751587, 'timestamp': '2025-10-01 04:15:57.366619', 'step': 2696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:57.400053', 'step': 2696, 'epoch': 1} {'type': 'loss', 'content': 0.13336172699928284, 'timestamp': '2025-10-01 04:15:57.402235', 'step': 2697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:57.434684', 'step': 2697, 'epoch': 1} {'type': 'loss', 'content': 0.16538451611995697, 'timestamp': '2025-10-01 04:15:57.437188', 'step': 2698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:57.468746', 'step': 2698, 'epoch': 1} {'type': 'loss', 'content': 0.18819841742515564, 'timestamp': '2025-10-01 04:15:57.471574', 'step': 2699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:57.502630', 'step': 2699, 'epoch': 1} {'type': 'loss', 'content': 0.1741286665201187, 'timestamp': '2025-10-01 04:15:57.526859', 'step': 2700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:57.557908', 'step': 2700, 'epoch': 1} {'type': 'loss', 'content': 0.10367550700902939, 'timestamp': '2025-10-01 04:15:57.560042', 'step': 2701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:57.589978', 'step': 2701, 'epoch': 1} {'type': 'loss', 'content': 0.1829526126384735, 'timestamp': '2025-10-01 04:15:57.592499', 'step': 2702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:57.626838', 'step': 2702, 'epoch': 1} {'type': 'loss', 'content': 0.267117440700531, 'timestamp': '2025-10-01 04:15:57.629076', 'step': 2703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:57.660199', 'step': 2703, 'epoch': 1} {'type': 'loss', 'content': 0.1798599809408188, 'timestamp': '2025-10-01 04:15:57.683809', 'step': 2704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:57.715736', 'step': 2704, 'epoch': 1} {'type': 'loss', 'content': 0.19742916524410248, 'timestamp': '2025-10-01 04:15:57.717816', 'step': 2705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:57.757260', 'step': 2705, 'epoch': 1} {'type': 'loss', 'content': 0.1534760445356369, 'timestamp': '2025-10-01 04:15:57.759396', 'step': 2706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:57.791310', 'step': 2706, 'epoch': 1} {'type': 'loss', 'content': 0.2544996440410614, 'timestamp': '2025-10-01 04:15:57.793480', 'step': 2707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:57.823175', 'step': 2707, 'epoch': 1} {'type': 'loss', 'content': 0.20008301734924316, 'timestamp': '2025-10-01 04:15:57.846967', 'step': 2708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:57.878137', 'step': 2708, 'epoch': 1} {'type': 'loss', 'content': 0.21413268148899078, 'timestamp': '2025-10-01 04:15:57.880156', 'step': 2709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:57.918206', 'step': 2709, 'epoch': 1} {'type': 'loss', 'content': 0.23552188277244568, 'timestamp': '2025-10-01 04:15:57.928563', 'step': 2710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:57.959087', 'step': 2710, 'epoch': 1} {'type': 'loss', 'content': 0.17334653437137604, 'timestamp': '2025-10-01 04:15:57.962480', 'step': 2711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:57.994733', 'step': 2711, 'epoch': 1} {'type': 'loss', 'content': 0.221593976020813, 'timestamp': '2025-10-01 04:15:58.018861', 'step': 2712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:58.050082', 'step': 2712, 'epoch': 1} {'type': 'loss', 'content': 0.22279633581638336, 'timestamp': '2025-10-01 04:15:58.052379', 'step': 2713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:58.082890', 'step': 2713, 'epoch': 1} {'type': 'loss', 'content': 0.12688256800174713, 'timestamp': '2025-10-01 04:15:58.084903', 'step': 2714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:58.116136', 'step': 2714, 'epoch': 1} {'type': 'loss', 'content': 0.19122444093227386, 'timestamp': '2025-10-01 04:15:58.118288', 'step': 2715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:58.149597', 'step': 2715, 'epoch': 1} {'type': 'loss', 'content': 0.12080132216215134, 'timestamp': '2025-10-01 04:15:58.173309', 'step': 2716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:58.204508', 'step': 2716, 'epoch': 1} {'type': 'loss', 'content': 0.12484323233366013, 'timestamp': '2025-10-01 04:15:58.206599', 'step': 2717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:58.240686', 'step': 2717, 'epoch': 1} {'type': 'loss', 'content': 0.18630392849445343, 'timestamp': '2025-10-01 04:15:58.242900', 'step': 2718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:58.274290', 'step': 2718, 'epoch': 1} {'type': 'loss', 'content': 0.13558408617973328, 'timestamp': '2025-10-01 04:15:58.276451', 'step': 2719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:58.309167', 'step': 2719, 'epoch': 1} {'type': 'loss', 'content': 0.29758843779563904, 'timestamp': '2025-10-01 04:15:58.333028', 'step': 2720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:58.365254', 'step': 2720, 'epoch': 1} {'type': 'loss', 'content': 0.2366100400686264, 'timestamp': '2025-10-01 04:15:58.367406', 'step': 2721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:58.396986', 'step': 2721, 'epoch': 1} {'type': 'loss', 'content': 0.165525421500206, 'timestamp': '2025-10-01 04:15:58.400165', 'step': 2722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:58.430170', 'step': 2722, 'epoch': 1} {'type': 'loss', 'content': 0.18717053532600403, 'timestamp': '2025-10-01 04:15:58.433597', 'step': 2723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:58.465139', 'step': 2723, 'epoch': 1} {'type': 'loss', 'content': 0.17817771434783936, 'timestamp': '2025-10-01 04:15:58.493769', 'step': 2724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:58.523600', 'step': 2724, 'epoch': 1} {'type': 'loss', 'content': 0.2104490101337433, 'timestamp': '2025-10-01 04:15:58.525724', 'step': 2725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:58.556019', 'step': 2725, 'epoch': 1} {'type': 'loss', 'content': 0.12429971992969513, 'timestamp': '2025-10-01 04:15:58.558413', 'step': 2726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:58.589173', 'step': 2726, 'epoch': 1} {'type': 'loss', 'content': 0.17898285388946533, 'timestamp': '2025-10-01 04:15:58.591428', 'step': 2727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:58.621993', 'step': 2727, 'epoch': 1} {'type': 'loss', 'content': 0.23768754303455353, 'timestamp': '2025-10-01 04:15:58.645637', 'step': 2728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:58.677292', 'step': 2728, 'epoch': 1} {'type': 'loss', 'content': 0.14138399064540863, 'timestamp': '2025-10-01 04:15:58.679473', 'step': 2729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:58.709554', 'step': 2729, 'epoch': 1} {'type': 'loss', 'content': 0.13644106686115265, 'timestamp': '2025-10-01 04:15:58.711767', 'step': 2730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:58.744860', 'step': 2730, 'epoch': 1} {'type': 'loss', 'content': 0.24411499500274658, 'timestamp': '2025-10-01 04:15:58.746915', 'step': 2731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:58.783622', 'step': 2731, 'epoch': 1} {'type': 'loss', 'content': 0.14060398936271667, 'timestamp': '2025-10-01 04:15:58.807318', 'step': 2732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:58.837678', 'step': 2732, 'epoch': 1} {'type': 'loss', 'content': 0.13750453293323517, 'timestamp': '2025-10-01 04:15:58.839619', 'step': 2733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:58.870127', 'step': 2733, 'epoch': 1} {'type': 'loss', 'content': 0.19655051827430725, 'timestamp': '2025-10-01 04:15:58.872368', 'step': 2734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:58.903027', 'step': 2734, 'epoch': 1} {'type': 'loss', 'content': 0.20269259810447693, 'timestamp': '2025-10-01 04:15:58.905172', 'step': 2735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:58.935984', 'step': 2735, 'epoch': 1} {'type': 'loss', 'content': 0.180007204413414, 'timestamp': '2025-10-01 04:15:58.959719', 'step': 2736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:15:58.990682', 'step': 2736, 'epoch': 1} {'type': 'loss', 'content': 0.10366319119930267, 'timestamp': '2025-10-01 04:15:58.992977', 'step': 2737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:59.023172', 'step': 2737, 'epoch': 1} {'type': 'loss', 'content': 0.17985861003398895, 'timestamp': '2025-10-01 04:15:59.025522', 'step': 2738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:59.056273', 'step': 2738, 'epoch': 1} {'type': 'loss', 'content': 0.12835711240768433, 'timestamp': '2025-10-01 04:15:59.058404', 'step': 2739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:59.088854', 'step': 2739, 'epoch': 1} {'type': 'loss', 'content': 0.17887134850025177, 'timestamp': '2025-10-01 04:15:59.112498', 'step': 2740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:59.143121', 'step': 2740, 'epoch': 1} {'type': 'loss', 'content': 0.1637362241744995, 'timestamp': '2025-10-01 04:15:59.145251', 'step': 2741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:59.176205', 'step': 2741, 'epoch': 1} {'type': 'loss', 'content': 0.12041384726762772, 'timestamp': '2025-10-01 04:15:59.178317', 'step': 2742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:59.220438', 'step': 2742, 'epoch': 1} {'type': 'loss', 'content': 0.13227489590644836, 'timestamp': '2025-10-01 04:15:59.222631', 'step': 2743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:59.253548', 'step': 2743, 'epoch': 1} {'type': 'loss', 'content': 0.16183805465698242, 'timestamp': '2025-10-01 04:15:59.277139', 'step': 2744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:59.309377', 'step': 2744, 'epoch': 1} {'type': 'loss', 'content': 0.2273915559053421, 'timestamp': '2025-10-01 04:15:59.326548', 'step': 2745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:59.358053', 'step': 2745, 'epoch': 1} {'type': 'loss', 'content': 0.14139175415039062, 'timestamp': '2025-10-01 04:15:59.360222', 'step': 2746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:59.390960', 'step': 2746, 'epoch': 1} {'type': 'loss', 'content': 0.14309430122375488, 'timestamp': '2025-10-01 04:15:59.393244', 'step': 2747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:59.423378', 'step': 2747, 'epoch': 1} {'type': 'loss', 'content': 0.15378165245056152, 'timestamp': '2025-10-01 04:15:59.448331', 'step': 2748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:15:59.479735', 'step': 2748, 'epoch': 1} {'type': 'loss', 'content': 0.18870897591114044, 'timestamp': '2025-10-01 04:15:59.484247', 'step': 2749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:59.523823', 'step': 2749, 'epoch': 1} {'type': 'loss', 'content': 0.12114790081977844, 'timestamp': '2025-10-01 04:15:59.525978', 'step': 2750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:59.556107', 'step': 2750, 'epoch': 1} {'type': 'loss', 'content': 0.22417502105236053, 'timestamp': '2025-10-01 04:15:59.558226', 'step': 2751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:59.601845', 'step': 2751, 'epoch': 1} {'type': 'loss', 'content': 0.12628071010112762, 'timestamp': '2025-10-01 04:15:59.625563', 'step': 2752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:15:59.654935', 'step': 2752, 'epoch': 1} {'type': 'loss', 'content': 0.21090488135814667, 'timestamp': '2025-10-01 04:15:59.657316', 'step': 2753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:59.687686', 'step': 2753, 'epoch': 1} {'type': 'loss', 'content': 0.24073848128318787, 'timestamp': '2025-10-01 04:15:59.689801', 'step': 2754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:59.720424', 'step': 2754, 'epoch': 1} {'type': 'loss', 'content': 0.21726509928703308, 'timestamp': '2025-10-01 04:15:59.722569', 'step': 2755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:59.752750', 'step': 2755, 'epoch': 1} {'type': 'loss', 'content': 0.12954148650169373, 'timestamp': '2025-10-01 04:15:59.776413', 'step': 2756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:15:59.806692', 'step': 2756, 'epoch': 1} {'type': 'loss', 'content': 0.18226464092731476, 'timestamp': '2025-10-01 04:15:59.808883', 'step': 2757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:59.839813', 'step': 2757, 'epoch': 1} {'type': 'loss', 'content': 0.19687794148921967, 'timestamp': '2025-10-01 04:15:59.842335', 'step': 2758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:59.872603', 'step': 2758, 'epoch': 1} {'type': 'loss', 'content': 0.16303643584251404, 'timestamp': '2025-10-01 04:15:59.874902', 'step': 2759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:15:59.905477', 'step': 2759, 'epoch': 1} {'type': 'loss', 'content': 0.09662824869155884, 'timestamp': '2025-10-01 04:15:59.929313', 'step': 2760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:15:59.960526', 'step': 2760, 'epoch': 1} {'type': 'loss', 'content': 0.15440526604652405, 'timestamp': '2025-10-01 04:15:59.963043', 'step': 2761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:15:59.994072', 'step': 2761, 'epoch': 1} {'type': 'loss', 'content': 0.13968560099601746, 'timestamp': '2025-10-01 04:15:59.996738', 'step': 2762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:00.027466', 'step': 2762, 'epoch': 1} {'type': 'loss', 'content': 0.15451765060424805, 'timestamp': '2025-10-01 04:16:00.029982', 'step': 2763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:00.060422', 'step': 2763, 'epoch': 1} {'type': 'loss', 'content': 0.1998492181301117, 'timestamp': '2025-10-01 04:16:00.084569', 'step': 2764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:00.114908', 'step': 2764, 'epoch': 1} {'type': 'loss', 'content': 0.1633787602186203, 'timestamp': '2025-10-01 04:16:00.117392', 'step': 2765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:00.148197', 'step': 2765, 'epoch': 1} {'type': 'loss', 'content': 0.19822931289672852, 'timestamp': '2025-10-01 04:16:00.164311', 'step': 2766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:00.196563', 'step': 2766, 'epoch': 1} {'type': 'loss', 'content': 0.17692944407463074, 'timestamp': '2025-10-01 04:16:00.199363', 'step': 2767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:00.229955', 'step': 2767, 'epoch': 1} {'type': 'loss', 'content': 0.18000417947769165, 'timestamp': '2025-10-01 04:16:00.265683', 'step': 2768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:00.297042', 'step': 2768, 'epoch': 1} {'type': 'loss', 'content': 0.15006837248802185, 'timestamp': '2025-10-01 04:16:00.299366', 'step': 2769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:00.331320', 'step': 2769, 'epoch': 1} {'type': 'loss', 'content': 0.21252959966659546, 'timestamp': '2025-10-01 04:16:00.333914', 'step': 2770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:00.366208', 'step': 2770, 'epoch': 1} {'type': 'loss', 'content': 0.10730347782373428, 'timestamp': '2025-10-01 04:16:00.368546', 'step': 2771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:00.400017', 'step': 2771, 'epoch': 1} {'type': 'loss', 'content': 0.23028738796710968, 'timestamp': '2025-10-01 04:16:00.423630', 'step': 2772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:00.453917', 'step': 2772, 'epoch': 1} {'type': 'loss', 'content': 0.0808556005358696, 'timestamp': '2025-10-01 04:16:00.456348', 'step': 2773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:00.488044', 'step': 2773, 'epoch': 1} {'type': 'loss', 'content': 0.21605144441127777, 'timestamp': '2025-10-01 04:16:00.490629', 'step': 2774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:00.521116', 'step': 2774, 'epoch': 1} {'type': 'loss', 'content': 0.1610497385263443, 'timestamp': '2025-10-01 04:16:00.523938', 'step': 2775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:00.554572', 'step': 2775, 'epoch': 1} {'type': 'loss', 'content': 0.17737337946891785, 'timestamp': '2025-10-01 04:16:00.578495', 'step': 2776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:00.615037', 'step': 2776, 'epoch': 1} {'type': 'loss', 'content': 0.15441715717315674, 'timestamp': '2025-10-01 04:16:00.629791', 'step': 2777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:00.660807', 'step': 2777, 'epoch': 1} {'type': 'loss', 'content': 0.13356001675128937, 'timestamp': '2025-10-01 04:16:00.663352', 'step': 2778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:00.694360', 'step': 2778, 'epoch': 1} {'type': 'loss', 'content': 0.11121907085180283, 'timestamp': '2025-10-01 04:16:00.697004', 'step': 2779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:00.727596', 'step': 2779, 'epoch': 1} {'type': 'loss', 'content': 0.2380479872226715, 'timestamp': '2025-10-01 04:16:00.760231', 'step': 2780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:00.791283', 'step': 2780, 'epoch': 1} {'type': 'loss', 'content': 0.18101252615451813, 'timestamp': '2025-10-01 04:16:00.794942', 'step': 2781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:00.826575', 'step': 2781, 'epoch': 1} {'type': 'loss', 'content': 0.1602148860692978, 'timestamp': '2025-10-01 04:16:00.828979', 'step': 2782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:00.861070', 'step': 2782, 'epoch': 1} {'type': 'loss', 'content': 0.19867607951164246, 'timestamp': '2025-10-01 04:16:00.863247', 'step': 2783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:00.893446', 'step': 2783, 'epoch': 1} {'type': 'loss', 'content': 0.21163882315158844, 'timestamp': '2025-10-01 04:16:00.917875', 'step': 2784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:00.948092', 'step': 2784, 'epoch': 1} {'type': 'loss', 'content': 0.10507997125387192, 'timestamp': '2025-10-01 04:16:00.950912', 'step': 2785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:00.981841', 'step': 2785, 'epoch': 1} {'type': 'loss', 'content': 0.243912473320961, 'timestamp': '2025-10-01 04:16:00.984030', 'step': 2786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:01.014463', 'step': 2786, 'epoch': 1} {'type': 'loss', 'content': 0.1645420789718628, 'timestamp': '2025-10-01 04:16:01.017170', 'step': 2787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:01.046869', 'step': 2787, 'epoch': 1} {'type': 'loss', 'content': 0.17424288392066956, 'timestamp': '2025-10-01 04:16:01.078158', 'step': 2788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:01.108613', 'step': 2788, 'epoch': 1} {'type': 'loss', 'content': 0.13371926546096802, 'timestamp': '2025-10-01 04:16:01.110513', 'step': 2789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:01.140445', 'step': 2789, 'epoch': 1} {'type': 'loss', 'content': 0.20456154644489288, 'timestamp': '2025-10-01 04:16:01.142636', 'step': 2790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:01.172534', 'step': 2790, 'epoch': 1} {'type': 'loss', 'content': 0.13236360251903534, 'timestamp': '2025-10-01 04:16:01.174470', 'step': 2791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:01.203806', 'step': 2791, 'epoch': 1} {'type': 'loss', 'content': 0.34614723920822144, 'timestamp': '2025-10-01 04:16:01.227519', 'step': 2792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:01.257674', 'step': 2792, 'epoch': 1} {'type': 'loss', 'content': 0.15037232637405396, 'timestamp': '2025-10-01 04:16:01.259663', 'step': 2793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:01.290072', 'step': 2793, 'epoch': 1} {'type': 'loss', 'content': 0.1718083620071411, 'timestamp': '2025-10-01 04:16:01.292737', 'step': 2794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:01.322763', 'step': 2794, 'epoch': 1} {'type': 'loss', 'content': 0.1529848426580429, 'timestamp': '2025-10-01 04:16:01.324897', 'step': 2795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:01.356286', 'step': 2795, 'epoch': 1} {'type': 'loss', 'content': 0.14439789950847626, 'timestamp': '2025-10-01 04:16:01.380054', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:16:10.821700', 'step': 2796, 'epoch': 1} {'type': 'pplx', 'content': 9250.338103703447, 'timestamp': '2025-10-01 04:16:10.824551', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:10.855279', 'step': 2796, 'epoch': 1} {'type': 'loss', 'content': 0.26567021012306213, 'timestamp': '2025-10-01 04:16:10.857517', 'step': 2797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:16:10.891149', 'step': 2797, 'epoch': 1} {'type': 'loss', 'content': 0.19776852428913116, 'timestamp': '2025-10-01 04:16:10.895373', 'step': 2798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:10.955333', 'step': 2798, 'epoch': 1} {'type': 'loss', 'content': 0.09404866397380829, 'timestamp': '2025-10-01 04:16:10.957550', 'step': 2799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:10.990408', 'step': 2799, 'epoch': 1} {'type': 'loss', 'content': 0.15954922139644623, 'timestamp': '2025-10-01 04:16:11.013765', 'step': 2800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:11.044903', 'step': 2800, 'epoch': 1} {'type': 'loss', 'content': 0.11718804389238358, 'timestamp': '2025-10-01 04:16:11.047015', 'step': 2801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:11.081397', 'step': 2801, 'epoch': 1} {'type': 'loss', 'content': 0.135531485080719, 'timestamp': '2025-10-01 04:16:11.083509', 'step': 2802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:11.122408', 'step': 2802, 'epoch': 1} {'type': 'loss', 'content': 0.10141874104738235, 'timestamp': '2025-10-01 04:16:11.124752', 'step': 2803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:11.157719', 'step': 2803, 'epoch': 1} {'type': 'loss', 'content': 0.1810673326253891, 'timestamp': '2025-10-01 04:16:11.181590', 'step': 2804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:11.217424', 'step': 2804, 'epoch': 1} {'type': 'loss', 'content': 0.16741766035556793, 'timestamp': '2025-10-01 04:16:11.219666', 'step': 2805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:11.262239', 'step': 2805, 'epoch': 1} {'type': 'loss', 'content': 0.12055950611829758, 'timestamp': '2025-10-01 04:16:11.264703', 'step': 2806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:11.295222', 'step': 2806, 'epoch': 1} {'type': 'loss', 'content': 0.253801554441452, 'timestamp': '2025-10-01 04:16:11.300903', 'step': 2807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:11.331505', 'step': 2807, 'epoch': 1} {'type': 'loss', 'content': 0.21141748130321503, 'timestamp': '2025-10-01 04:16:11.355395', 'step': 2808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:11.385939', 'step': 2808, 'epoch': 1} {'type': 'loss', 'content': 0.375253826379776, 'timestamp': '2025-10-01 04:16:11.388154', 'step': 2809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:11.418029', 'step': 2809, 'epoch': 1} {'type': 'loss', 'content': 0.1597864031791687, 'timestamp': '2025-10-01 04:16:11.420248', 'step': 2810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:11.452136', 'step': 2810, 'epoch': 1} {'type': 'loss', 'content': 0.11032981425523758, 'timestamp': '2025-10-01 04:16:11.454400', 'step': 2811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:11.485299', 'step': 2811, 'epoch': 1} {'type': 'loss', 'content': 0.11309630423784256, 'timestamp': '2025-10-01 04:16:11.509088', 'step': 2812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:11.546182', 'step': 2812, 'epoch': 1} {'type': 'loss', 'content': 0.1619274616241455, 'timestamp': '2025-10-01 04:16:11.548746', 'step': 2813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:11.579245', 'step': 2813, 'epoch': 1} {'type': 'loss', 'content': 0.15075364708900452, 'timestamp': '2025-10-01 04:16:11.582248', 'step': 2814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:11.611792', 'step': 2814, 'epoch': 1} {'type': 'loss', 'content': 0.1970561444759369, 'timestamp': '2025-10-01 04:16:11.613919', 'step': 2815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:11.645031', 'step': 2815, 'epoch': 1} {'type': 'loss', 'content': 0.17874419689178467, 'timestamp': '2025-10-01 04:16:11.680746', 'step': 2816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:11.712834', 'step': 2816, 'epoch': 1} {'type': 'loss', 'content': 0.12614792585372925, 'timestamp': '2025-10-01 04:16:11.714889', 'step': 2817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:11.748242', 'step': 2817, 'epoch': 1} {'type': 'loss', 'content': 0.1491018831729889, 'timestamp': '2025-10-01 04:16:11.750647', 'step': 2818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:11.783872', 'step': 2818, 'epoch': 1} {'type': 'loss', 'content': 0.2943778932094574, 'timestamp': '2025-10-01 04:16:11.786364', 'step': 2819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:11.817325', 'step': 2819, 'epoch': 1} {'type': 'loss', 'content': 0.2089802622795105, 'timestamp': '2025-10-01 04:16:11.841138', 'step': 2820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:11.872421', 'step': 2820, 'epoch': 1} {'type': 'loss', 'content': 0.1728626787662506, 'timestamp': '2025-10-01 04:16:11.874794', 'step': 2821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:11.918367', 'step': 2821, 'epoch': 1} {'type': 'loss', 'content': 0.247595876455307, 'timestamp': '2025-10-01 04:16:11.920678', 'step': 2822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:11.951478', 'step': 2822, 'epoch': 1} {'type': 'loss', 'content': 0.17748135328292847, 'timestamp': '2025-10-01 04:16:11.953905', 'step': 2823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:11.992271', 'step': 2823, 'epoch': 1} {'type': 'loss', 'content': 0.08866993337869644, 'timestamp': '2025-10-01 04:16:12.016031', 'step': 2824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:12.048873', 'step': 2824, 'epoch': 1} {'type': 'loss', 'content': 0.14153917133808136, 'timestamp': '2025-10-01 04:16:12.050933', 'step': 2825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:12.087972', 'step': 2825, 'epoch': 1} {'type': 'loss', 'content': 0.18100391328334808, 'timestamp': '2025-10-01 04:16:12.090749', 'step': 2826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:12.125583', 'step': 2826, 'epoch': 1} {'type': 'loss', 'content': 0.1675315946340561, 'timestamp': '2025-10-01 04:16:12.127727', 'step': 2827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:12.165240', 'step': 2827, 'epoch': 1} {'type': 'loss', 'content': 0.08925415575504303, 'timestamp': '2025-10-01 04:16:12.189027', 'step': 2828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:12.219711', 'step': 2828, 'epoch': 1} {'type': 'loss', 'content': 0.1228296235203743, 'timestamp': '2025-10-01 04:16:12.221921', 'step': 2829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:12.254552', 'step': 2829, 'epoch': 1} {'type': 'loss', 'content': 0.28391364216804504, 'timestamp': '2025-10-01 04:16:12.258179', 'step': 2830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:12.293057', 'step': 2830, 'epoch': 1} {'type': 'loss', 'content': 0.18258501589298248, 'timestamp': '2025-10-01 04:16:12.295109', 'step': 2831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:12.331688', 'step': 2831, 'epoch': 1} {'type': 'loss', 'content': 0.15407094359397888, 'timestamp': '2025-10-01 04:16:12.355757', 'step': 2832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:12.390054', 'step': 2832, 'epoch': 1} {'type': 'loss', 'content': 0.12613146007061005, 'timestamp': '2025-10-01 04:16:12.395848', 'step': 2833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:12.427442', 'step': 2833, 'epoch': 1} {'type': 'loss', 'content': 0.10976289957761765, 'timestamp': '2025-10-01 04:16:12.429995', 'step': 2834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:12.462125', 'step': 2834, 'epoch': 1} {'type': 'loss', 'content': 0.1106959730386734, 'timestamp': '2025-10-01 04:16:12.466058', 'step': 2835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:12.506675', 'step': 2835, 'epoch': 1} {'type': 'loss', 'content': 0.1614813357591629, 'timestamp': '2025-10-01 04:16:12.530791', 'step': 2836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:12.564422', 'step': 2836, 'epoch': 1} {'type': 'loss', 'content': 0.20871269702911377, 'timestamp': '2025-10-01 04:16:12.566779', 'step': 2837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:12.598579', 'step': 2837, 'epoch': 1} {'type': 'loss', 'content': 0.20627270638942719, 'timestamp': '2025-10-01 04:16:12.600633', 'step': 2838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:12.641454', 'step': 2838, 'epoch': 1} {'type': 'loss', 'content': 0.12885090708732605, 'timestamp': '2025-10-01 04:16:12.643583', 'step': 2839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:12.676195', 'step': 2839, 'epoch': 1} {'type': 'loss', 'content': 0.16802556812763214, 'timestamp': '2025-10-01 04:16:12.699981', 'step': 2840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:12.730358', 'step': 2840, 'epoch': 1} {'type': 'loss', 'content': 0.3278319835662842, 'timestamp': '2025-10-01 04:16:12.732589', 'step': 2841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:12.761854', 'step': 2841, 'epoch': 1} {'type': 'loss', 'content': 0.15078847110271454, 'timestamp': '2025-10-01 04:16:12.764134', 'step': 2842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:12.802861', 'step': 2842, 'epoch': 1} {'type': 'loss', 'content': 0.14621563255786896, 'timestamp': '2025-10-01 04:16:12.805302', 'step': 2843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:12.835292', 'step': 2843, 'epoch': 1} {'type': 'loss', 'content': 0.16195224225521088, 'timestamp': '2025-10-01 04:16:12.858953', 'step': 2844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:12.889579', 'step': 2844, 'epoch': 1} {'type': 'loss', 'content': 0.1777282953262329, 'timestamp': '2025-10-01 04:16:12.891501', 'step': 2845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:12.926496', 'step': 2845, 'epoch': 1} {'type': 'loss', 'content': 0.1630794107913971, 'timestamp': '2025-10-01 04:16:12.928751', 'step': 2846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:12.959528', 'step': 2846, 'epoch': 1} {'type': 'loss', 'content': 0.16835731267929077, 'timestamp': '2025-10-01 04:16:12.964269', 'step': 2847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:12.995114', 'step': 2847, 'epoch': 1} {'type': 'loss', 'content': 0.1307685226202011, 'timestamp': '2025-10-01 04:16:13.018681', 'step': 2848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:13.049276', 'step': 2848, 'epoch': 1} {'type': 'loss', 'content': 0.1169205754995346, 'timestamp': '2025-10-01 04:16:13.051535', 'step': 2849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:13.089230', 'step': 2849, 'epoch': 1} {'type': 'loss', 'content': 0.12953968346118927, 'timestamp': '2025-10-01 04:16:13.091452', 'step': 2850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:13.121800', 'step': 2850, 'epoch': 1} {'type': 'loss', 'content': 0.13008497655391693, 'timestamp': '2025-10-01 04:16:13.124028', 'step': 2851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:13.153736', 'step': 2851, 'epoch': 1} {'type': 'loss', 'content': 0.1427156627178192, 'timestamp': '2025-10-01 04:16:13.177498', 'step': 2852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:13.207801', 'step': 2852, 'epoch': 1} {'type': 'loss', 'content': 0.1459639072418213, 'timestamp': '2025-10-01 04:16:13.210116', 'step': 2853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:13.239797', 'step': 2853, 'epoch': 1} {'type': 'loss', 'content': 0.18767936527729034, 'timestamp': '2025-10-01 04:16:13.241720', 'step': 2854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:13.272400', 'step': 2854, 'epoch': 1} {'type': 'loss', 'content': 0.19719161093235016, 'timestamp': '2025-10-01 04:16:13.274558', 'step': 2855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:13.305182', 'step': 2855, 'epoch': 1} {'type': 'loss', 'content': 0.15432536602020264, 'timestamp': '2025-10-01 04:16:13.328703', 'step': 2856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:13.358704', 'step': 2856, 'epoch': 1} {'type': 'loss', 'content': 0.25332406163215637, 'timestamp': '2025-10-01 04:16:13.361958', 'step': 2857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:13.392237', 'step': 2857, 'epoch': 1} {'type': 'loss', 'content': 0.14248153567314148, 'timestamp': '2025-10-01 04:16:13.394254', 'step': 2858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:13.424007', 'step': 2858, 'epoch': 1} {'type': 'loss', 'content': 0.28723058104515076, 'timestamp': '2025-10-01 04:16:13.426195', 'step': 2859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:13.457042', 'step': 2859, 'epoch': 1} {'type': 'loss', 'content': 0.20133090019226074, 'timestamp': '2025-10-01 04:16:13.480705', 'step': 2860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:13.511749', 'step': 2860, 'epoch': 1} {'type': 'loss', 'content': 0.16286560893058777, 'timestamp': '2025-10-01 04:16:13.514037', 'step': 2861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:13.545364', 'step': 2861, 'epoch': 1} {'type': 'loss', 'content': 0.19124233722686768, 'timestamp': '2025-10-01 04:16:13.547556', 'step': 2862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:13.577798', 'step': 2862, 'epoch': 1} {'type': 'loss', 'content': 0.07154480367898941, 'timestamp': '2025-10-01 04:16:13.580023', 'step': 2863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:13.611764', 'step': 2863, 'epoch': 1} {'type': 'loss', 'content': 0.11940307915210724, 'timestamp': '2025-10-01 04:16:13.635774', 'step': 2864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:13.665900', 'step': 2864, 'epoch': 1} {'type': 'loss', 'content': 0.21318106353282928, 'timestamp': '2025-10-01 04:16:13.667927', 'step': 2865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:13.700055', 'step': 2865, 'epoch': 1} {'type': 'loss', 'content': 0.1706777960062027, 'timestamp': '2025-10-01 04:16:13.702238', 'step': 2866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:13.732700', 'step': 2866, 'epoch': 1} {'type': 'loss', 'content': 0.20168136060237885, 'timestamp': '2025-10-01 04:16:13.734975', 'step': 2867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:13.765883', 'step': 2867, 'epoch': 1} {'type': 'loss', 'content': 0.1929386556148529, 'timestamp': '2025-10-01 04:16:13.789568', 'step': 2868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:16:13.820244', 'step': 2868, 'epoch': 1} {'type': 'loss', 'content': 0.19267237186431885, 'timestamp': '2025-10-01 04:16:13.822356', 'step': 2869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:13.852793', 'step': 2869, 'epoch': 1} {'type': 'loss', 'content': 0.1143789291381836, 'timestamp': '2025-10-01 04:16:13.855100', 'step': 2870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:13.885794', 'step': 2870, 'epoch': 1} {'type': 'loss', 'content': 0.15283408761024475, 'timestamp': '2025-10-01 04:16:13.888264', 'step': 2871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:13.919862', 'step': 2871, 'epoch': 1} {'type': 'loss', 'content': 0.09776512533426285, 'timestamp': '2025-10-01 04:16:13.943484', 'step': 2872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:13.974125', 'step': 2872, 'epoch': 1} {'type': 'loss', 'content': 0.21647396683692932, 'timestamp': '2025-10-01 04:16:13.976326', 'step': 2873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:14.007680', 'step': 2873, 'epoch': 1} {'type': 'loss', 'content': 0.09562642127275467, 'timestamp': '2025-10-01 04:16:14.009919', 'step': 2874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:14.039892', 'step': 2874, 'epoch': 1} {'type': 'loss', 'content': 0.1339494287967682, 'timestamp': '2025-10-01 04:16:14.041987', 'step': 2875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:14.072178', 'step': 2875, 'epoch': 1} {'type': 'loss', 'content': 0.2385515719652176, 'timestamp': '2025-10-01 04:16:14.095588', 'step': 2876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:14.125860', 'step': 2876, 'epoch': 1} {'type': 'loss', 'content': 0.22751548886299133, 'timestamp': '2025-10-01 04:16:14.128211', 'step': 2877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:14.159015', 'step': 2877, 'epoch': 1} {'type': 'loss', 'content': 0.1764056533575058, 'timestamp': '2025-10-01 04:16:14.161321', 'step': 2878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:14.193828', 'step': 2878, 'epoch': 1} {'type': 'loss', 'content': 0.16213923692703247, 'timestamp': '2025-10-01 04:16:14.196139', 'step': 2879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:14.226871', 'step': 2879, 'epoch': 1} {'type': 'loss', 'content': 0.19517146050930023, 'timestamp': '2025-10-01 04:16:14.250501', 'step': 2880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:14.283268', 'step': 2880, 'epoch': 1} {'type': 'loss', 'content': 0.18586893379688263, 'timestamp': '2025-10-01 04:16:14.285424', 'step': 2881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:14.317376', 'step': 2881, 'epoch': 1} {'type': 'loss', 'content': 0.21867039799690247, 'timestamp': '2025-10-01 04:16:14.319443', 'step': 2882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:14.350065', 'step': 2882, 'epoch': 1} {'type': 'loss', 'content': 0.15134809911251068, 'timestamp': '2025-10-01 04:16:14.351990', 'step': 2883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:14.382165', 'step': 2883, 'epoch': 1} {'type': 'loss', 'content': 0.24245937168598175, 'timestamp': '2025-10-01 04:16:14.405959', 'step': 2884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:14.437607', 'step': 2884, 'epoch': 1} {'type': 'loss', 'content': 0.12728539109230042, 'timestamp': '2025-10-01 04:16:14.439781', 'step': 2885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:14.470192', 'step': 2885, 'epoch': 1} {'type': 'loss', 'content': 0.1192231997847557, 'timestamp': '2025-10-01 04:16:14.472445', 'step': 2886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:14.503500', 'step': 2886, 'epoch': 1} {'type': 'loss', 'content': 0.15449215471744537, 'timestamp': '2025-10-01 04:16:14.505744', 'step': 2887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:14.535713', 'step': 2887, 'epoch': 1} {'type': 'loss', 'content': 0.11772879213094711, 'timestamp': '2025-10-01 04:16:14.559538', 'step': 2888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:14.589586', 'step': 2888, 'epoch': 1} {'type': 'loss', 'content': 0.18725742399692535, 'timestamp': '2025-10-01 04:16:14.591901', 'step': 2889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:14.621947', 'step': 2889, 'epoch': 1} {'type': 'loss', 'content': 0.14696691930294037, 'timestamp': '2025-10-01 04:16:14.624396', 'step': 2890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:14.654783', 'step': 2890, 'epoch': 1} {'type': 'loss', 'content': 0.10393564403057098, 'timestamp': '2025-10-01 04:16:14.656760', 'step': 2891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:14.687150', 'step': 2891, 'epoch': 1} {'type': 'loss', 'content': 0.12649281322956085, 'timestamp': '2025-10-01 04:16:14.710934', 'step': 2892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:14.741683', 'step': 2892, 'epoch': 1} {'type': 'loss', 'content': 0.1161552295088768, 'timestamp': '2025-10-01 04:16:14.743914', 'step': 2893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:14.773845', 'step': 2893, 'epoch': 1} {'type': 'loss', 'content': 0.15674668550491333, 'timestamp': '2025-10-01 04:16:14.776776', 'step': 2894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:14.807099', 'step': 2894, 'epoch': 1} {'type': 'loss', 'content': 0.11028577387332916, 'timestamp': '2025-10-01 04:16:14.809050', 'step': 2895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:16:14.839980', 'step': 2895, 'epoch': 1} {'type': 'loss', 'content': 0.18008480966091156, 'timestamp': '2025-10-01 04:16:14.865743', 'step': 2896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:14.896335', 'step': 2896, 'epoch': 1} {'type': 'loss', 'content': 0.06038673222064972, 'timestamp': '2025-10-01 04:16:14.898331', 'step': 2897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:14.927791', 'step': 2897, 'epoch': 1} {'type': 'loss', 'content': 0.17762857675552368, 'timestamp': '2025-10-01 04:16:14.930015', 'step': 2898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:14.960228', 'step': 2898, 'epoch': 1} {'type': 'loss', 'content': 0.218806192278862, 'timestamp': '2025-10-01 04:16:14.962573', 'step': 2899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:14.995866', 'step': 2899, 'epoch': 1} {'type': 'loss', 'content': 0.16848637163639069, 'timestamp': '2025-10-01 04:16:15.020091', 'step': 2900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:15.050586', 'step': 2900, 'epoch': 1} {'type': 'loss', 'content': 0.1354353427886963, 'timestamp': '2025-10-01 04:16:15.052859', 'step': 2901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:15.083325', 'step': 2901, 'epoch': 1} {'type': 'loss', 'content': 0.14162902534008026, 'timestamp': '2025-10-01 04:16:15.085531', 'step': 2902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:15.115523', 'step': 2902, 'epoch': 1} {'type': 'loss', 'content': 0.21504800021648407, 'timestamp': '2025-10-01 04:16:15.117886', 'step': 2903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:15.158099', 'step': 2903, 'epoch': 1} {'type': 'loss', 'content': 0.08555194735527039, 'timestamp': '2025-10-01 04:16:15.182316', 'step': 2904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:15.214034', 'step': 2904, 'epoch': 1} {'type': 'loss', 'content': 0.19326816499233246, 'timestamp': '2025-10-01 04:16:15.215964', 'step': 2905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:15.247290', 'step': 2905, 'epoch': 1} {'type': 'loss', 'content': 0.1677582859992981, 'timestamp': '2025-10-01 04:16:15.250122', 'step': 2906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:15.282841', 'step': 2906, 'epoch': 1} {'type': 'loss', 'content': 0.11622229963541031, 'timestamp': '2025-10-01 04:16:15.285350', 'step': 2907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:15.317227', 'step': 2907, 'epoch': 1} {'type': 'loss', 'content': 0.1983182728290558, 'timestamp': '2025-10-01 04:16:15.340930', 'step': 2908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:15.373460', 'step': 2908, 'epoch': 1} {'type': 'loss', 'content': 0.20452716946601868, 'timestamp': '2025-10-01 04:16:15.375624', 'step': 2909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:15.408242', 'step': 2909, 'epoch': 1} {'type': 'loss', 'content': 0.12739020586013794, 'timestamp': '2025-10-01 04:16:15.410442', 'step': 2910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:15.442717', 'step': 2910, 'epoch': 1} {'type': 'loss', 'content': 0.1598951369524002, 'timestamp': '2025-10-01 04:16:15.444914', 'step': 2911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:15.475561', 'step': 2911, 'epoch': 1} {'type': 'loss', 'content': 0.20300546288490295, 'timestamp': '2025-10-01 04:16:15.499204', 'step': 2912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:15.529582', 'step': 2912, 'epoch': 1} {'type': 'loss', 'content': 0.15578314661979675, 'timestamp': '2025-10-01 04:16:15.542310', 'step': 2913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:15.572878', 'step': 2913, 'epoch': 1} {'type': 'loss', 'content': 0.18694515526294708, 'timestamp': '2025-10-01 04:16:15.574989', 'step': 2914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:15.607522', 'step': 2914, 'epoch': 1} {'type': 'loss', 'content': 0.1624855101108551, 'timestamp': '2025-10-01 04:16:15.609882', 'step': 2915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:15.641091', 'step': 2915, 'epoch': 1} {'type': 'loss', 'content': 0.17270159721374512, 'timestamp': '2025-10-01 04:16:15.664757', 'step': 2916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:15.696575', 'step': 2916, 'epoch': 1} {'type': 'loss', 'content': 0.12918856739997864, 'timestamp': '2025-10-01 04:16:15.711045', 'step': 2917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:15.741086', 'step': 2917, 'epoch': 1} {'type': 'loss', 'content': 0.19328434765338898, 'timestamp': '2025-10-01 04:16:15.743599', 'step': 2918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:15.774513', 'step': 2918, 'epoch': 1} {'type': 'loss', 'content': 0.17822426557540894, 'timestamp': '2025-10-01 04:16:15.776680', 'step': 2919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:15.808128', 'step': 2919, 'epoch': 1} {'type': 'loss', 'content': 0.20472584664821625, 'timestamp': '2025-10-01 04:16:15.831760', 'step': 2920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:15.862440', 'step': 2920, 'epoch': 1} {'type': 'loss', 'content': 0.17833155393600464, 'timestamp': '2025-10-01 04:16:15.864708', 'step': 2921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:15.895742', 'step': 2921, 'epoch': 1} {'type': 'loss', 'content': 0.150935560464859, 'timestamp': '2025-10-01 04:16:15.898282', 'step': 2922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:15.928336', 'step': 2922, 'epoch': 1} {'type': 'loss', 'content': 0.18908128142356873, 'timestamp': '2025-10-01 04:16:15.930391', 'step': 2923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:15.968345', 'step': 2923, 'epoch': 1} {'type': 'loss', 'content': 0.19067876040935516, 'timestamp': '2025-10-01 04:16:15.992016', 'step': 2924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:16.024541', 'step': 2924, 'epoch': 1} {'type': 'loss', 'content': 0.22786635160446167, 'timestamp': '2025-10-01 04:16:16.026601', 'step': 2925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:16.057513', 'step': 2925, 'epoch': 1} {'type': 'loss', 'content': 0.18290719389915466, 'timestamp': '2025-10-01 04:16:16.060376', 'step': 2926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:16.091857', 'step': 2926, 'epoch': 1} {'type': 'loss', 'content': 0.09774445742368698, 'timestamp': '2025-10-01 04:16:16.094163', 'step': 2927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.126045', 'step': 2927, 'epoch': 1} {'type': 'loss', 'content': 0.17905884981155396, 'timestamp': '2025-10-01 04:16:16.149811', 'step': 2928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:16.180066', 'step': 2928, 'epoch': 1} {'type': 'loss', 'content': 0.22519874572753906, 'timestamp': '2025-10-01 04:16:16.182185', 'step': 2929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:16.212054', 'step': 2929, 'epoch': 1} {'type': 'loss', 'content': 0.17305488884449005, 'timestamp': '2025-10-01 04:16:16.214260', 'step': 2930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.245127', 'step': 2930, 'epoch': 1} {'type': 'loss', 'content': 0.16165220737457275, 'timestamp': '2025-10-01 04:16:16.249247', 'step': 2931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:16.281445', 'step': 2931, 'epoch': 1} {'type': 'loss', 'content': 0.20048339664936066, 'timestamp': '2025-10-01 04:16:16.305633', 'step': 2932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.337431', 'step': 2932, 'epoch': 1} {'type': 'loss', 'content': 0.16963015496730804, 'timestamp': '2025-10-01 04:16:16.339652', 'step': 2933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.370750', 'step': 2933, 'epoch': 1} {'type': 'loss', 'content': 0.25589340925216675, 'timestamp': '2025-10-01 04:16:16.373105', 'step': 2934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.403961', 'step': 2934, 'epoch': 1} {'type': 'loss', 'content': 0.1484137773513794, 'timestamp': '2025-10-01 04:16:16.406135', 'step': 2935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:16:16.436392', 'step': 2935, 'epoch': 1} {'type': 'loss', 'content': 0.22656558454036713, 'timestamp': '2025-10-01 04:16:16.462126', 'step': 2936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:16.492708', 'step': 2936, 'epoch': 1} {'type': 'loss', 'content': 0.19498439133167267, 'timestamp': '2025-10-01 04:16:16.494953', 'step': 2937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:16.526120', 'step': 2937, 'epoch': 1} {'type': 'loss', 'content': 0.23949649930000305, 'timestamp': '2025-10-01 04:16:16.528561', 'step': 2938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.558884', 'step': 2938, 'epoch': 1} {'type': 'loss', 'content': 0.18549905717372894, 'timestamp': '2025-10-01 04:16:16.561159', 'step': 2939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:16.591689', 'step': 2939, 'epoch': 1} {'type': 'loss', 'content': 0.24195683002471924, 'timestamp': '2025-10-01 04:16:16.615469', 'step': 2940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.646245', 'step': 2940, 'epoch': 1} {'type': 'loss', 'content': 0.19616657495498657, 'timestamp': '2025-10-01 04:16:16.648361', 'step': 2941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.680797', 'step': 2941, 'epoch': 1} {'type': 'loss', 'content': 0.11843114346265793, 'timestamp': '2025-10-01 04:16:16.683056', 'step': 2942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.715683', 'step': 2942, 'epoch': 1} {'type': 'loss', 'content': 0.13649336993694305, 'timestamp': '2025-10-01 04:16:16.717860', 'step': 2943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:16.748692', 'step': 2943, 'epoch': 1} {'type': 'loss', 'content': 0.08964478969573975, 'timestamp': '2025-10-01 04:16:16.772614', 'step': 2944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.802556', 'step': 2944, 'epoch': 1} {'type': 'loss', 'content': 0.1403052657842636, 'timestamp': '2025-10-01 04:16:16.804638', 'step': 2945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:16.835437', 'step': 2945, 'epoch': 1} {'type': 'loss', 'content': 0.14553441107273102, 'timestamp': '2025-10-01 04:16:16.838247', 'step': 2946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:16.878585', 'step': 2946, 'epoch': 1} {'type': 'loss', 'content': 0.16598287224769592, 'timestamp': '2025-10-01 04:16:16.880931', 'step': 2947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:16.915008', 'step': 2947, 'epoch': 1} {'type': 'loss', 'content': 0.12917838990688324, 'timestamp': '2025-10-01 04:16:16.938850', 'step': 2948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:16.973673', 'step': 2948, 'epoch': 1} {'type': 'loss', 'content': 0.09365054965019226, 'timestamp': '2025-10-01 04:16:16.975778', 'step': 2949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.008530', 'step': 2949, 'epoch': 1} {'type': 'loss', 'content': 0.12677668035030365, 'timestamp': '2025-10-01 04:16:17.010672', 'step': 2950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.042279', 'step': 2950, 'epoch': 1} {'type': 'loss', 'content': 0.18803231418132782, 'timestamp': '2025-10-01 04:16:17.044927', 'step': 2951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:17.075812', 'step': 2951, 'epoch': 1} {'type': 'loss', 'content': 0.22041600942611694, 'timestamp': '2025-10-01 04:16:17.099402', 'step': 2952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.131871', 'step': 2952, 'epoch': 1} {'type': 'loss', 'content': 0.18209290504455566, 'timestamp': '2025-10-01 04:16:17.134288', 'step': 2953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.165644', 'step': 2953, 'epoch': 1} {'type': 'loss', 'content': 0.15421512722969055, 'timestamp': '2025-10-01 04:16:17.167709', 'step': 2954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:17.199903', 'step': 2954, 'epoch': 1} {'type': 'loss', 'content': 0.18646691739559174, 'timestamp': '2025-10-01 04:16:17.201875', 'step': 2955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:17.234693', 'step': 2955, 'epoch': 1} {'type': 'loss', 'content': 0.11325094103813171, 'timestamp': '2025-10-01 04:16:17.258258', 'step': 2956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.291173', 'step': 2956, 'epoch': 1} {'type': 'loss', 'content': 0.19126994907855988, 'timestamp': '2025-10-01 04:16:17.293450', 'step': 2957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:17.330361', 'step': 2957, 'epoch': 1} {'type': 'loss', 'content': 0.2427918016910553, 'timestamp': '2025-10-01 04:16:17.334876', 'step': 2958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.370374', 'step': 2958, 'epoch': 1} {'type': 'loss', 'content': 0.20072513818740845, 'timestamp': '2025-10-01 04:16:17.374268', 'step': 2959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:17.404860', 'step': 2959, 'epoch': 1} {'type': 'loss', 'content': 0.18611587584018707, 'timestamp': '2025-10-01 04:16:17.428431', 'step': 2960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:17.458462', 'step': 2960, 'epoch': 1} {'type': 'loss', 'content': 0.17848387360572815, 'timestamp': '2025-10-01 04:16:17.460477', 'step': 2961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:17.492996', 'step': 2961, 'epoch': 1} {'type': 'loss', 'content': 0.18065960705280304, 'timestamp': '2025-10-01 04:16:17.495740', 'step': 2962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:17.527244', 'step': 2962, 'epoch': 1} {'type': 'loss', 'content': 0.16044984757900238, 'timestamp': '2025-10-01 04:16:17.529304', 'step': 2963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.567792', 'step': 2963, 'epoch': 1} {'type': 'loss', 'content': 0.1411897987127304, 'timestamp': '2025-10-01 04:16:17.591405', 'step': 2964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.623324', 'step': 2964, 'epoch': 1} {'type': 'loss', 'content': 0.1769884079694748, 'timestamp': '2025-10-01 04:16:17.625584', 'step': 2965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:17.655994', 'step': 2965, 'epoch': 1} {'type': 'loss', 'content': 0.21391066908836365, 'timestamp': '2025-10-01 04:16:17.658060', 'step': 2966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.687735', 'step': 2966, 'epoch': 1} {'type': 'loss', 'content': 0.14800959825515747, 'timestamp': '2025-10-01 04:16:17.689573', 'step': 2967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:17.720970', 'step': 2967, 'epoch': 1} {'type': 'loss', 'content': 0.11064320802688599, 'timestamp': '2025-10-01 04:16:17.744364', 'step': 2968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.775693', 'step': 2968, 'epoch': 1} {'type': 'loss', 'content': 0.14551734924316406, 'timestamp': '2025-10-01 04:16:17.777759', 'step': 2969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:17.808012', 'step': 2969, 'epoch': 1} {'type': 'loss', 'content': 0.1343720555305481, 'timestamp': '2025-10-01 04:16:17.810016', 'step': 2970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:17.843155', 'step': 2970, 'epoch': 1} {'type': 'loss', 'content': 0.23137494921684265, 'timestamp': '2025-10-01 04:16:17.845065', 'step': 2971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:17.875808', 'step': 2971, 'epoch': 1} {'type': 'loss', 'content': 0.17164355516433716, 'timestamp': '2025-10-01 04:16:17.899514', 'step': 2972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:17.932250', 'step': 2972, 'epoch': 1} {'type': 'loss', 'content': 0.1531783491373062, 'timestamp': '2025-10-01 04:16:17.934325', 'step': 2973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:17.965258', 'step': 2973, 'epoch': 1} {'type': 'loss', 'content': 0.13807512819766998, 'timestamp': '2025-10-01 04:16:17.968165', 'step': 2974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:18.000201', 'step': 2974, 'epoch': 1} {'type': 'loss', 'content': 0.1619856208562851, 'timestamp': '2025-10-01 04:16:18.002500', 'step': 2975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:18.032535', 'step': 2975, 'epoch': 1} {'type': 'loss', 'content': 0.12012337148189545, 'timestamp': '2025-10-01 04:16:18.056151', 'step': 2976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:16:18.087860', 'step': 2976, 'epoch': 1} {'type': 'loss', 'content': 0.18580745160579681, 'timestamp': '2025-10-01 04:16:18.092522', 'step': 2977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:18.124012', 'step': 2977, 'epoch': 1} {'type': 'loss', 'content': 0.12372595816850662, 'timestamp': '2025-10-01 04:16:18.126152', 'step': 2978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:18.164419', 'step': 2978, 'epoch': 1} {'type': 'loss', 'content': 0.09210501611232758, 'timestamp': '2025-10-01 04:16:18.166576', 'step': 2979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:18.198319', 'step': 2979, 'epoch': 1} {'type': 'loss', 'content': 0.20143920183181763, 'timestamp': '2025-10-01 04:16:18.223511', 'step': 2980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:18.255742', 'step': 2980, 'epoch': 1} {'type': 'loss', 'content': 0.11255001276731491, 'timestamp': '2025-10-01 04:16:18.257889', 'step': 2981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:18.293122', 'step': 2981, 'epoch': 1} {'type': 'loss', 'content': 0.18176840245723724, 'timestamp': '2025-10-01 04:16:18.295387', 'step': 2982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:18.348200', 'step': 2982, 'epoch': 1} {'type': 'loss', 'content': 0.15489605069160461, 'timestamp': '2025-10-01 04:16:18.352537', 'step': 2983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:18.403041', 'step': 2983, 'epoch': 1} {'type': 'loss', 'content': 0.25200578570365906, 'timestamp': '2025-10-01 04:16:18.427143', 'step': 2984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:18.463944', 'step': 2984, 'epoch': 1} {'type': 'loss', 'content': 0.11828508973121643, 'timestamp': '2025-10-01 04:16:18.466100', 'step': 2985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:18.499502', 'step': 2985, 'epoch': 1} {'type': 'loss', 'content': 0.17585502564907074, 'timestamp': '2025-10-01 04:16:18.502164', 'step': 2986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:18.539233', 'step': 2986, 'epoch': 1} {'type': 'loss', 'content': 0.0877920538187027, 'timestamp': '2025-10-01 04:16:18.541572', 'step': 2987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:16:18.573644', 'step': 2987, 'epoch': 1} {'type': 'loss', 'content': 0.1929386705160141, 'timestamp': '2025-10-01 04:16:18.599531', 'step': 2988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:18.636935', 'step': 2988, 'epoch': 1} {'type': 'loss', 'content': 0.23249705135822296, 'timestamp': '2025-10-01 04:16:18.639377', 'step': 2989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:18.677266', 'step': 2989, 'epoch': 1} {'type': 'loss', 'content': 0.17712032794952393, 'timestamp': '2025-10-01 04:16:18.679552', 'step': 2990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:18.711815', 'step': 2990, 'epoch': 1} {'type': 'loss', 'content': 0.1360003799200058, 'timestamp': '2025-10-01 04:16:18.713844', 'step': 2991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:18.750210', 'step': 2991, 'epoch': 1} {'type': 'loss', 'content': 0.2284628301858902, 'timestamp': '2025-10-01 04:16:18.774016', 'step': 2992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:18.808867', 'step': 2992, 'epoch': 1} {'type': 'loss', 'content': 0.2795183062553406, 'timestamp': '2025-10-01 04:16:18.810890', 'step': 2993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:18.852007', 'step': 2993, 'epoch': 1} {'type': 'loss', 'content': 0.2138959914445877, 'timestamp': '2025-10-01 04:16:18.854281', 'step': 2994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:18.892648', 'step': 2994, 'epoch': 1} {'type': 'loss', 'content': 0.21992672979831696, 'timestamp': '2025-10-01 04:16:18.895030', 'step': 2995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:18.939481', 'step': 2995, 'epoch': 1} {'type': 'loss', 'content': 0.16993655264377594, 'timestamp': '2025-10-01 04:16:18.963369', 'step': 2996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:19.004691', 'step': 2996, 'epoch': 1} {'type': 'loss', 'content': 0.15227670967578888, 'timestamp': '2025-10-01 04:16:19.006991', 'step': 2997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:19.048464', 'step': 2997, 'epoch': 1} {'type': 'loss', 'content': 0.15107490122318268, 'timestamp': '2025-10-01 04:16:19.050584', 'step': 2998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:16:19.093625', 'step': 2998, 'epoch': 1} {'type': 'loss', 'content': 0.135331392288208, 'timestamp': '2025-10-01 04:16:19.102226', 'step': 2999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:19.142134', 'step': 2999, 'epoch': 1} {'type': 'loss', 'content': 0.13675855100154877, 'timestamp': '2025-10-01 04:16:19.166070', 'step': 3000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-10-01 04:16:24.064690', 'step': 3000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:24.103804', 'step': 3000, 'epoch': 1} {'type': 'loss', 'content': 0.10492698103189468, 'timestamp': '2025-10-01 04:16:24.105892', 'step': 3001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:24.138611', 'step': 3001, 'epoch': 1} {'type': 'loss', 'content': 0.16839557886123657, 'timestamp': '2025-10-01 04:16:24.140569', 'step': 3002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:24.177707', 'step': 3002, 'epoch': 1} {'type': 'loss', 'content': 0.14869491755962372, 'timestamp': '2025-10-01 04:16:24.179616', 'step': 3003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:24.221644', 'step': 3003, 'epoch': 1} {'type': 'loss', 'content': 0.1286327987909317, 'timestamp': '2025-10-01 04:16:24.245560', 'step': 3004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:24.279688', 'step': 3004, 'epoch': 1} {'type': 'loss', 'content': 0.08930502831935883, 'timestamp': '2025-10-01 04:16:24.281812', 'step': 3005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:24.316617', 'step': 3005, 'epoch': 1} {'type': 'loss', 'content': 0.16015128791332245, 'timestamp': '2025-10-01 04:16:24.318760', 'step': 3006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:24.364202', 'step': 3006, 'epoch': 1} {'type': 'loss', 'content': 0.20563001930713654, 'timestamp': '2025-10-01 04:16:24.366500', 'step': 3007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:24.402815', 'step': 3007, 'epoch': 1} {'type': 'loss', 'content': 0.0722687691450119, 'timestamp': '2025-10-01 04:16:24.426399', 'step': 3008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:24.472500', 'step': 3008, 'epoch': 1} {'type': 'loss', 'content': 0.2026166170835495, 'timestamp': '2025-10-01 04:16:24.474580', 'step': 3009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:24.527898', 'step': 3009, 'epoch': 1} {'type': 'loss', 'content': 0.26139041781425476, 'timestamp': '2025-10-01 04:16:24.530637', 'step': 3010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:24.568413', 'step': 3010, 'epoch': 1} {'type': 'loss', 'content': 0.13810013234615326, 'timestamp': '2025-10-01 04:16:24.570408', 'step': 3011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:24.643271', 'step': 3011, 'epoch': 1} {'type': 'loss', 'content': 0.07240314036607742, 'timestamp': '2025-10-01 04:16:24.675786', 'step': 3012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:24.716589', 'step': 3012, 'epoch': 1} {'type': 'loss', 'content': 0.1510995775461197, 'timestamp': '2025-10-01 04:16:24.718405', 'step': 3013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:24.766652', 'step': 3013, 'epoch': 1} {'type': 'loss', 'content': 0.11308474838733673, 'timestamp': '2025-10-01 04:16:24.769069', 'step': 3014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:24.804485', 'step': 3014, 'epoch': 1} {'type': 'loss', 'content': 0.1357848048210144, 'timestamp': '2025-10-01 04:16:24.806732', 'step': 3015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:24.848626', 'step': 3015, 'epoch': 1} {'type': 'loss', 'content': 0.2362777143716812, 'timestamp': '2025-10-01 04:16:24.871957', 'step': 3016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:24.923166', 'step': 3016, 'epoch': 1} {'type': 'loss', 'content': 0.18820196390151978, 'timestamp': '2025-10-01 04:16:24.925532', 'step': 3017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:24.974284', 'step': 3017, 'epoch': 1} {'type': 'loss', 'content': 0.21058478951454163, 'timestamp': '2025-10-01 04:16:24.976665', 'step': 3018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:25.011440', 'step': 3018, 'epoch': 1} {'type': 'loss', 'content': 0.10322821885347366, 'timestamp': '2025-10-01 04:16:25.013460', 'step': 3019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:25.060565', 'step': 3019, 'epoch': 1} {'type': 'loss', 'content': 0.14826437830924988, 'timestamp': '2025-10-01 04:16:25.084005', 'step': 3020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:25.120198', 'step': 3020, 'epoch': 1} {'type': 'loss', 'content': 0.15328994393348694, 'timestamp': '2025-10-01 04:16:25.122057', 'step': 3021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:25.182342', 'step': 3021, 'epoch': 1} {'type': 'loss', 'content': 0.13500168919563293, 'timestamp': '2025-10-01 04:16:25.184830', 'step': 3022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:25.218690', 'step': 3022, 'epoch': 1} {'type': 'loss', 'content': 0.21119005978107452, 'timestamp': '2025-10-01 04:16:25.221121', 'step': 3023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:25.284682', 'step': 3023, 'epoch': 1} {'type': 'loss', 'content': 0.21956107020378113, 'timestamp': '2025-10-01 04:16:25.308499', 'step': 3024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:25.348626', 'step': 3024, 'epoch': 1} {'type': 'loss', 'content': 0.1835067719221115, 'timestamp': '2025-10-01 04:16:25.350538', 'step': 3025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:25.386121', 'step': 3025, 'epoch': 1} {'type': 'loss', 'content': 0.09099652618169785, 'timestamp': '2025-10-01 04:16:25.393036', 'step': 3026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:25.426873', 'step': 3026, 'epoch': 1} {'type': 'loss', 'content': 0.20437224209308624, 'timestamp': '2025-10-01 04:16:25.428791', 'step': 3027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:25.478681', 'step': 3027, 'epoch': 1} {'type': 'loss', 'content': 0.2198304384946823, 'timestamp': '2025-10-01 04:16:25.502060', 'step': 3028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:25.535796', 'step': 3028, 'epoch': 1} {'type': 'loss', 'content': 0.11145123094320297, 'timestamp': '2025-10-01 04:16:25.537761', 'step': 3029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:25.573261', 'step': 3029, 'epoch': 1} {'type': 'loss', 'content': 0.19832713901996613, 'timestamp': '2025-10-01 04:16:25.575242', 'step': 3030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:25.607809', 'step': 3030, 'epoch': 1} {'type': 'loss', 'content': 0.15419219434261322, 'timestamp': '2025-10-01 04:16:25.609887', 'step': 3031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:25.646339', 'step': 3031, 'epoch': 1} {'type': 'loss', 'content': 0.1469224989414215, 'timestamp': '2025-10-01 04:16:25.670628', 'step': 3032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:25.715289', 'step': 3032, 'epoch': 1} {'type': 'loss', 'content': 0.17147164046764374, 'timestamp': '2025-10-01 04:16:25.717305', 'step': 3033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:25.756073', 'step': 3033, 'epoch': 1} {'type': 'loss', 'content': 0.17334716022014618, 'timestamp': '2025-10-01 04:16:25.758027', 'step': 3034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:25.796371', 'step': 3034, 'epoch': 1} {'type': 'loss', 'content': 0.13904158771038055, 'timestamp': '2025-10-01 04:16:25.798869', 'step': 3035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:25.841476', 'step': 3035, 'epoch': 1} {'type': 'loss', 'content': 0.1983814686536789, 'timestamp': '2025-10-01 04:16:25.865135', 'step': 3036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:25.923940', 'step': 3036, 'epoch': 1} {'type': 'loss', 'content': 0.1417865753173828, 'timestamp': '2025-10-01 04:16:25.927271', 'step': 3037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:25.959973', 'step': 3037, 'epoch': 1} {'type': 'loss', 'content': 0.25349506735801697, 'timestamp': '2025-10-01 04:16:25.962271', 'step': 3038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:26.001852', 'step': 3038, 'epoch': 1} {'type': 'loss', 'content': 0.1244109719991684, 'timestamp': '2025-10-01 04:16:26.003829', 'step': 3039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:26.036419', 'step': 3039, 'epoch': 1} {'type': 'loss', 'content': 0.23774759471416473, 'timestamp': '2025-10-01 04:16:26.060355', 'step': 3040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:26.103334', 'step': 3040, 'epoch': 1} {'type': 'loss', 'content': 0.09588680416345596, 'timestamp': '2025-10-01 04:16:26.105598', 'step': 3041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:26.141698', 'step': 3041, 'epoch': 1} {'type': 'loss', 'content': 0.14993731677532196, 'timestamp': '2025-10-01 04:16:26.144077', 'step': 3042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:26.178543', 'step': 3042, 'epoch': 1} {'type': 'loss', 'content': 0.19929799437522888, 'timestamp': '2025-10-01 04:16:26.180769', 'step': 3043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:26.213721', 'step': 3043, 'epoch': 1} {'type': 'loss', 'content': 0.20210354030132294, 'timestamp': '2025-10-01 04:16:26.237248', 'step': 3044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:26.273751', 'step': 3044, 'epoch': 1} {'type': 'loss', 'content': 0.2606426179409027, 'timestamp': '2025-10-01 04:16:26.275646', 'step': 3045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:26.308677', 'step': 3045, 'epoch': 1} {'type': 'loss', 'content': 0.11549009382724762, 'timestamp': '2025-10-01 04:16:26.311016', 'step': 3046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:26.345345', 'step': 3046, 'epoch': 1} {'type': 'loss', 'content': 0.2520303428173065, 'timestamp': '2025-10-01 04:16:26.347594', 'step': 3047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:26.391582', 'step': 3047, 'epoch': 1} {'type': 'loss', 'content': 0.12423478066921234, 'timestamp': '2025-10-01 04:16:26.415375', 'step': 3048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:26.448021', 'step': 3048, 'epoch': 1} {'type': 'loss', 'content': 0.13734552264213562, 'timestamp': '2025-10-01 04:16:26.450043', 'step': 3049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:26.483123', 'step': 3049, 'epoch': 1} {'type': 'loss', 'content': 0.10235516726970673, 'timestamp': '2025-10-01 04:16:26.485351', 'step': 3050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:26.518476', 'step': 3050, 'epoch': 1} {'type': 'loss', 'content': 0.1756286472082138, 'timestamp': '2025-10-01 04:16:26.522036', 'step': 3051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:26.558266', 'step': 3051, 'epoch': 1} {'type': 'loss', 'content': 0.17885762453079224, 'timestamp': '2025-10-01 04:16:26.582136', 'step': 3052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:26.623797', 'step': 3052, 'epoch': 1} {'type': 'loss', 'content': 0.21997115015983582, 'timestamp': '2025-10-01 04:16:26.625932', 'step': 3053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:26.680479', 'step': 3053, 'epoch': 1} {'type': 'loss', 'content': 0.19902533292770386, 'timestamp': '2025-10-01 04:16:26.682634', 'step': 3054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:26.717792', 'step': 3054, 'epoch': 1} {'type': 'loss', 'content': 0.12559565901756287, 'timestamp': '2025-10-01 04:16:26.720260', 'step': 3055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:26.753792', 'step': 3055, 'epoch': 1} {'type': 'loss', 'content': 0.19898968935012817, 'timestamp': '2025-10-01 04:16:26.777501', 'step': 3056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:26.810650', 'step': 3056, 'epoch': 1} {'type': 'loss', 'content': 0.08871421217918396, 'timestamp': '2025-10-01 04:16:26.812626', 'step': 3057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:26.846704', 'step': 3057, 'epoch': 1} {'type': 'loss', 'content': 0.29829591512680054, 'timestamp': '2025-10-01 04:16:26.848847', 'step': 3058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:26.882450', 'step': 3058, 'epoch': 1} {'type': 'loss', 'content': 0.14374622702598572, 'timestamp': '2025-10-01 04:16:26.884821', 'step': 3059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:26.917732', 'step': 3059, 'epoch': 1} {'type': 'loss', 'content': 0.2709408402442932, 'timestamp': '2025-10-01 04:16:26.941933', 'step': 3060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:26.976156', 'step': 3060, 'epoch': 1} {'type': 'loss', 'content': 0.06215548515319824, 'timestamp': '2025-10-01 04:16:26.978177', 'step': 3061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:27.016674', 'step': 3061, 'epoch': 1} {'type': 'loss', 'content': 0.12732046842575073, 'timestamp': '2025-10-01 04:16:27.020212', 'step': 3062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:27.057583', 'step': 3062, 'epoch': 1} {'type': 'loss', 'content': 0.23670871555805206, 'timestamp': '2025-10-01 04:16:27.060178', 'step': 3063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:27.106799', 'step': 3063, 'epoch': 1} {'type': 'loss', 'content': 0.1422901153564453, 'timestamp': '2025-10-01 04:16:27.132800', 'step': 3064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:27.178820', 'step': 3064, 'epoch': 1} {'type': 'loss', 'content': 0.19289973378181458, 'timestamp': '2025-10-01 04:16:27.181052', 'step': 3065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:27.226914', 'step': 3065, 'epoch': 1} {'type': 'loss', 'content': 0.10435310751199722, 'timestamp': '2025-10-01 04:16:27.229059', 'step': 3066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:27.262915', 'step': 3066, 'epoch': 1} {'type': 'loss', 'content': 0.1670176386833191, 'timestamp': '2025-10-01 04:16:27.265274', 'step': 3067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:27.312155', 'step': 3067, 'epoch': 1} {'type': 'loss', 'content': 0.1285695880651474, 'timestamp': '2025-10-01 04:16:27.335845', 'step': 3068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:27.373594', 'step': 3068, 'epoch': 1} {'type': 'loss', 'content': 0.25003769993782043, 'timestamp': '2025-10-01 04:16:27.378651', 'step': 3069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:27.435128', 'step': 3069, 'epoch': 1} {'type': 'loss', 'content': 0.22206783294677734, 'timestamp': '2025-10-01 04:16:27.437074', 'step': 3070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:27.483494', 'step': 3070, 'epoch': 1} {'type': 'loss', 'content': 0.2198861986398697, 'timestamp': '2025-10-01 04:16:27.487895', 'step': 3071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:27.522435', 'step': 3071, 'epoch': 1} {'type': 'loss', 'content': 0.22494885325431824, 'timestamp': '2025-10-01 04:16:27.546167', 'step': 3072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:27.583675', 'step': 3072, 'epoch': 1} {'type': 'loss', 'content': 0.21837587654590607, 'timestamp': '2025-10-01 04:16:27.585783', 'step': 3073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:27.619574', 'step': 3073, 'epoch': 1} {'type': 'loss', 'content': 0.11515012383460999, 'timestamp': '2025-10-01 04:16:27.621565', 'step': 3074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:27.655839', 'step': 3074, 'epoch': 1} {'type': 'loss', 'content': 0.14011426270008087, 'timestamp': '2025-10-01 04:16:27.658301', 'step': 3075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:27.692221', 'step': 3075, 'epoch': 1} {'type': 'loss', 'content': 0.10788638144731522, 'timestamp': '2025-10-01 04:16:27.722601', 'step': 3076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:27.755794', 'step': 3076, 'epoch': 1} {'type': 'loss', 'content': 0.1594785451889038, 'timestamp': '2025-10-01 04:16:27.757765', 'step': 3077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:27.791376', 'step': 3077, 'epoch': 1} {'type': 'loss', 'content': 0.15782183408737183, 'timestamp': '2025-10-01 04:16:27.794511', 'step': 3078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:27.831692', 'step': 3078, 'epoch': 1} {'type': 'loss', 'content': 0.12153927236795425, 'timestamp': '2025-10-01 04:16:27.833935', 'step': 3079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:27.872880', 'step': 3079, 'epoch': 1} {'type': 'loss', 'content': 0.2621496617794037, 'timestamp': '2025-10-01 04:16:27.896563', 'step': 3080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:27.930714', 'step': 3080, 'epoch': 1} {'type': 'loss', 'content': 0.24190108478069305, 'timestamp': '2025-10-01 04:16:27.933292', 'step': 3081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:27.975586', 'step': 3081, 'epoch': 1} {'type': 'loss', 'content': 0.21533481776714325, 'timestamp': '2025-10-01 04:16:27.977657', 'step': 3082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:28.024381', 'step': 3082, 'epoch': 1} {'type': 'loss', 'content': 0.17360764741897583, 'timestamp': '2025-10-01 04:16:28.028471', 'step': 3083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:28.063910', 'step': 3083, 'epoch': 1} {'type': 'loss', 'content': 0.15322881937026978, 'timestamp': '2025-10-01 04:16:28.087612', 'step': 3084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:28.132463', 'step': 3084, 'epoch': 1} {'type': 'loss', 'content': 0.07542505115270615, 'timestamp': '2025-10-01 04:16:28.137321', 'step': 3085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:28.175495', 'step': 3085, 'epoch': 1} {'type': 'loss', 'content': 0.2060595005750656, 'timestamp': '2025-10-01 04:16:28.177900', 'step': 3086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:28.223182', 'step': 3086, 'epoch': 1} {'type': 'loss', 'content': 0.156333789229393, 'timestamp': '2025-10-01 04:16:28.225158', 'step': 3087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:28.259089', 'step': 3087, 'epoch': 1} {'type': 'loss', 'content': 0.21967312693595886, 'timestamp': '2025-10-01 04:16:28.282664', 'step': 3088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:28.318282', 'step': 3088, 'epoch': 1} {'type': 'loss', 'content': 0.27571597695350647, 'timestamp': '2025-10-01 04:16:28.320238', 'step': 3089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:28.366211', 'step': 3089, 'epoch': 1} {'type': 'loss', 'content': 0.10932718217372894, 'timestamp': '2025-10-01 04:16:28.368166', 'step': 3090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:28.412746', 'step': 3090, 'epoch': 1} {'type': 'loss', 'content': 0.20525534451007843, 'timestamp': '2025-10-01 04:16:28.414876', 'step': 3091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:28.455987', 'step': 3091, 'epoch': 1} {'type': 'loss', 'content': 0.1618671715259552, 'timestamp': '2025-10-01 04:16:28.480185', 'step': 3092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:28.513799', 'step': 3092, 'epoch': 1} {'type': 'loss', 'content': 0.20392483472824097, 'timestamp': '2025-10-01 04:16:28.515733', 'step': 3093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:28.547067', 'step': 3093, 'epoch': 1} {'type': 'loss', 'content': 0.1097012460231781, 'timestamp': '2025-10-01 04:16:28.552912', 'step': 3094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:28.585522', 'step': 3094, 'epoch': 1} {'type': 'loss', 'content': 0.1272892951965332, 'timestamp': '2025-10-01 04:16:28.588331', 'step': 3095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:28.630247', 'step': 3095, 'epoch': 1} {'type': 'loss', 'content': 0.1667330414056778, 'timestamp': '2025-10-01 04:16:28.664720', 'step': 3096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:28.706889', 'step': 3096, 'epoch': 1} {'type': 'loss', 'content': 0.09421208500862122, 'timestamp': '2025-10-01 04:16:28.713895', 'step': 3097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:28.756294', 'step': 3097, 'epoch': 1} {'type': 'loss', 'content': 0.2178158164024353, 'timestamp': '2025-10-01 04:16:28.758241', 'step': 3098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:28.803855', 'step': 3098, 'epoch': 1} {'type': 'loss', 'content': 0.15623798966407776, 'timestamp': '2025-10-01 04:16:28.805787', 'step': 3099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:28.837740', 'step': 3099, 'epoch': 1} {'type': 'loss', 'content': 0.1993459165096283, 'timestamp': '2025-10-01 04:16:28.861261', 'step': 3100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:28.898527', 'step': 3100, 'epoch': 1} {'type': 'loss', 'content': 0.2175452560186386, 'timestamp': '2025-10-01 04:16:28.900351', 'step': 3101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:28.933967', 'step': 3101, 'epoch': 1} {'type': 'loss', 'content': 0.10472581535577774, 'timestamp': '2025-10-01 04:16:28.935751', 'step': 3102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:28.968394', 'step': 3102, 'epoch': 1} {'type': 'loss', 'content': 0.20945952832698822, 'timestamp': '2025-10-01 04:16:28.970071', 'step': 3103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:29.008273', 'step': 3103, 'epoch': 1} {'type': 'loss', 'content': 0.10031862556934357, 'timestamp': '2025-10-01 04:16:29.033496', 'step': 3104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:29.072749', 'step': 3104, 'epoch': 1} {'type': 'loss', 'content': 0.21715672314167023, 'timestamp': '2025-10-01 04:16:29.074921', 'step': 3105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:29.108131', 'step': 3105, 'epoch': 1} {'type': 'loss', 'content': 0.16699270904064178, 'timestamp': '2025-10-01 04:16:29.110690', 'step': 3106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:29.157834', 'step': 3106, 'epoch': 1} {'type': 'loss', 'content': 0.1227458119392395, 'timestamp': '2025-10-01 04:16:29.159541', 'step': 3107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:29.193614', 'step': 3107, 'epoch': 1} {'type': 'loss', 'content': 0.11766202002763748, 'timestamp': '2025-10-01 04:16:29.217027', 'step': 3108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:29.266112', 'step': 3108, 'epoch': 1} {'type': 'loss', 'content': 0.1443295180797577, 'timestamp': '2025-10-01 04:16:29.268023', 'step': 3109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:29.311550', 'step': 3109, 'epoch': 1} {'type': 'loss', 'content': 0.14786545932292938, 'timestamp': '2025-10-01 04:16:29.313552', 'step': 3110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:29.361469', 'step': 3110, 'epoch': 1} {'type': 'loss', 'content': 0.12343626469373703, 'timestamp': '2025-10-01 04:16:29.363453', 'step': 3111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:29.402784', 'step': 3111, 'epoch': 1} {'type': 'loss', 'content': 0.20468154549598694, 'timestamp': '2025-10-01 04:16:29.426259', 'step': 3112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:29.465924', 'step': 3112, 'epoch': 1} {'type': 'loss', 'content': 0.12354167550802231, 'timestamp': '2025-10-01 04:16:29.467870', 'step': 3113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:29.507276', 'step': 3113, 'epoch': 1} {'type': 'loss', 'content': 0.1732112467288971, 'timestamp': '2025-10-01 04:16:29.509285', 'step': 3114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:29.550434', 'step': 3114, 'epoch': 1} {'type': 'loss', 'content': 0.14376989006996155, 'timestamp': '2025-10-01 04:16:29.552196', 'step': 3115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:29.587048', 'step': 3115, 'epoch': 1} {'type': 'loss', 'content': 0.30213478207588196, 'timestamp': '2025-10-01 04:16:29.612752', 'step': 3116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:29.650355', 'step': 3116, 'epoch': 1} {'type': 'loss', 'content': 0.17940470576286316, 'timestamp': '2025-10-01 04:16:29.652068', 'step': 3117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:29.696119', 'step': 3117, 'epoch': 1} {'type': 'loss', 'content': 0.12991991639137268, 'timestamp': '2025-10-01 04:16:29.699654', 'step': 3118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:29.739724', 'step': 3118, 'epoch': 1} {'type': 'loss', 'content': 0.15322789549827576, 'timestamp': '2025-10-01 04:16:29.741813', 'step': 3119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:29.773334', 'step': 3119, 'epoch': 1} {'type': 'loss', 'content': 0.11371281743049622, 'timestamp': '2025-10-01 04:16:29.796954', 'step': 3120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:29.831982', 'step': 3120, 'epoch': 1} {'type': 'loss', 'content': 0.13208508491516113, 'timestamp': '2025-10-01 04:16:29.833855', 'step': 3121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:29.864415', 'step': 3121, 'epoch': 1} {'type': 'loss', 'content': 0.18844757974147797, 'timestamp': '2025-10-01 04:16:29.866784', 'step': 3122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:29.897868', 'step': 3122, 'epoch': 1} {'type': 'loss', 'content': 0.11345919966697693, 'timestamp': '2025-10-01 04:16:29.899735', 'step': 3123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:29.930151', 'step': 3123, 'epoch': 1} {'type': 'loss', 'content': 0.1342848688364029, 'timestamp': '2025-10-01 04:16:29.959181', 'step': 3124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:29.989804', 'step': 3124, 'epoch': 1} {'type': 'loss', 'content': 0.1363251805305481, 'timestamp': '2025-10-01 04:16:29.991600', 'step': 3125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:30.036690', 'step': 3125, 'epoch': 1} {'type': 'loss', 'content': 0.17746573686599731, 'timestamp': '2025-10-01 04:16:30.038481', 'step': 3126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:30.078682', 'step': 3126, 'epoch': 1} {'type': 'loss', 'content': 0.16635718941688538, 'timestamp': '2025-10-01 04:16:30.081481', 'step': 3127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:16:30.117736', 'step': 3127, 'epoch': 1} {'type': 'loss', 'content': 0.12813787162303925, 'timestamp': '2025-10-01 04:16:30.143022', 'step': 3128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:30.173853', 'step': 3128, 'epoch': 1} {'type': 'loss', 'content': 0.14539024233818054, 'timestamp': '2025-10-01 04:16:30.175680', 'step': 3129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:30.217104', 'step': 3129, 'epoch': 1} {'type': 'loss', 'content': 0.26666152477264404, 'timestamp': '2025-10-01 04:16:30.222372', 'step': 3130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:30.257946', 'step': 3130, 'epoch': 1} {'type': 'loss', 'content': 0.147083580493927, 'timestamp': '2025-10-01 04:16:30.259740', 'step': 3131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:30.292271', 'step': 3131, 'epoch': 1} {'type': 'loss', 'content': 0.1286647915840149, 'timestamp': '2025-10-01 04:16:30.316641', 'step': 3132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:30.355424', 'step': 3132, 'epoch': 1} {'type': 'loss', 'content': 0.2099912017583847, 'timestamp': '2025-10-01 04:16:30.359640', 'step': 3133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:30.393442', 'step': 3133, 'epoch': 1} {'type': 'loss', 'content': 0.14121577143669128, 'timestamp': '2025-10-01 04:16:30.395234', 'step': 3134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:30.427374', 'step': 3134, 'epoch': 1} {'type': 'loss', 'content': 0.19488149881362915, 'timestamp': '2025-10-01 04:16:30.432716', 'step': 3135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:30.480986', 'step': 3135, 'epoch': 1} {'type': 'loss', 'content': 0.1873316466808319, 'timestamp': '2025-10-01 04:16:30.504686', 'step': 3136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:30.539853', 'step': 3136, 'epoch': 1} {'type': 'loss', 'content': 0.15814544260501862, 'timestamp': '2025-10-01 04:16:30.553330', 'step': 3137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:30.587035', 'step': 3137, 'epoch': 1} {'type': 'loss', 'content': 0.11345987021923065, 'timestamp': '2025-10-01 04:16:30.589259', 'step': 3138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:30.623220', 'step': 3138, 'epoch': 1} {'type': 'loss', 'content': 0.12740115821361542, 'timestamp': '2025-10-01 04:16:30.627179', 'step': 3139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:30.661581', 'step': 3139, 'epoch': 1} {'type': 'loss', 'content': 0.21321457624435425, 'timestamp': '2025-10-01 04:16:30.692897', 'step': 3140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:16:30.726277', 'step': 3140, 'epoch': 1} {'type': 'loss', 'content': 0.22614118456840515, 'timestamp': '2025-10-01 04:16:30.728618', 'step': 3141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:30.765154', 'step': 3141, 'epoch': 1} {'type': 'loss', 'content': 0.14461836218833923, 'timestamp': '2025-10-01 04:16:30.767167', 'step': 3142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:30.802575', 'step': 3142, 'epoch': 1} {'type': 'loss', 'content': 0.29769864678382874, 'timestamp': '2025-10-01 04:16:30.804471', 'step': 3143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:30.844804', 'step': 3143, 'epoch': 1} {'type': 'loss', 'content': 0.19363069534301758, 'timestamp': '2025-10-01 04:16:30.868212', 'step': 3144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:30.902675', 'step': 3144, 'epoch': 1} {'type': 'loss', 'content': 0.16092292964458466, 'timestamp': '2025-10-01 04:16:30.909194', 'step': 3145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:30.953350', 'step': 3145, 'epoch': 1} {'type': 'loss', 'content': 0.22863493859767914, 'timestamp': '2025-10-01 04:16:30.955373', 'step': 3146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:30.991910', 'step': 3146, 'epoch': 1} {'type': 'loss', 'content': 0.17137229442596436, 'timestamp': '2025-10-01 04:16:30.994431', 'step': 3147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:31.027325', 'step': 3147, 'epoch': 1} {'type': 'loss', 'content': 0.2634008228778839, 'timestamp': '2025-10-01 04:16:31.051810', 'step': 3148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:31.086098', 'step': 3148, 'epoch': 1} {'type': 'loss', 'content': 0.2043209969997406, 'timestamp': '2025-10-01 04:16:31.097269', 'step': 3149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:31.133997', 'step': 3149, 'epoch': 1} {'type': 'loss', 'content': 0.18964014947414398, 'timestamp': '2025-10-01 04:16:31.135947', 'step': 3150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:31.173284', 'step': 3150, 'epoch': 1} {'type': 'loss', 'content': 0.2752377986907959, 'timestamp': '2025-10-01 04:16:31.175123', 'step': 3151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:31.209809', 'step': 3151, 'epoch': 1} {'type': 'loss', 'content': 0.13790521025657654, 'timestamp': '2025-10-01 04:16:31.234539', 'step': 3152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:31.276386', 'step': 3152, 'epoch': 1} {'type': 'loss', 'content': 0.1772802621126175, 'timestamp': '2025-10-01 04:16:31.278418', 'step': 3153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:31.316942', 'step': 3153, 'epoch': 1} {'type': 'loss', 'content': 0.12462729215621948, 'timestamp': '2025-10-01 04:16:31.319034', 'step': 3154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:31.352697', 'step': 3154, 'epoch': 1} {'type': 'loss', 'content': 0.1989452838897705, 'timestamp': '2025-10-01 04:16:31.354860', 'step': 3155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:31.387253', 'step': 3155, 'epoch': 1} {'type': 'loss', 'content': 0.1864250898361206, 'timestamp': '2025-10-01 04:16:31.410667', 'step': 3156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:31.443624', 'step': 3156, 'epoch': 1} {'type': 'loss', 'content': 0.20971013605594635, 'timestamp': '2025-10-01 04:16:31.446095', 'step': 3157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:31.477499', 'step': 3157, 'epoch': 1} {'type': 'loss', 'content': 0.2132139801979065, 'timestamp': '2025-10-01 04:16:31.479280', 'step': 3158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:31.510316', 'step': 3158, 'epoch': 1} {'type': 'loss', 'content': 0.13154096901416779, 'timestamp': '2025-10-01 04:16:31.512354', 'step': 3159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:31.549429', 'step': 3159, 'epoch': 1} {'type': 'loss', 'content': 0.19642286002635956, 'timestamp': '2025-10-01 04:16:31.573419', 'step': 3160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:31.606371', 'step': 3160, 'epoch': 1} {'type': 'loss', 'content': 0.1738988310098648, 'timestamp': '2025-10-01 04:16:31.608344', 'step': 3161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:31.646325', 'step': 3161, 'epoch': 1} {'type': 'loss', 'content': 0.1408202201128006, 'timestamp': '2025-10-01 04:16:31.648317', 'step': 3162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:31.687803', 'step': 3162, 'epoch': 1} {'type': 'loss', 'content': 0.26714026927948, 'timestamp': '2025-10-01 04:16:31.689781', 'step': 3163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:31.722625', 'step': 3163, 'epoch': 1} {'type': 'loss', 'content': 0.22792835533618927, 'timestamp': '2025-10-01 04:16:31.752370', 'step': 3164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:31.789098', 'step': 3164, 'epoch': 1} {'type': 'loss', 'content': 0.29034820199012756, 'timestamp': '2025-10-01 04:16:31.791936', 'step': 3165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:31.834058', 'step': 3165, 'epoch': 1} {'type': 'loss', 'content': 0.19778142869472504, 'timestamp': '2025-10-01 04:16:31.836391', 'step': 3166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:31.870365', 'step': 3166, 'epoch': 1} {'type': 'loss', 'content': 0.21958114206790924, 'timestamp': '2025-10-01 04:16:31.874181', 'step': 3167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:31.919114', 'step': 3167, 'epoch': 1} {'type': 'loss', 'content': 0.2195032387971878, 'timestamp': '2025-10-01 04:16:31.944918', 'step': 3168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:31.979340', 'step': 3168, 'epoch': 1} {'type': 'loss', 'content': 0.09002043306827545, 'timestamp': '2025-10-01 04:16:31.981590', 'step': 3169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:32.014851', 'step': 3169, 'epoch': 1} {'type': 'loss', 'content': 0.11741934716701508, 'timestamp': '2025-10-01 04:16:32.016748', 'step': 3170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:32.060504', 'step': 3170, 'epoch': 1} {'type': 'loss', 'content': 0.1669570803642273, 'timestamp': '2025-10-01 04:16:32.062736', 'step': 3171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:32.097798', 'step': 3171, 'epoch': 1} {'type': 'loss', 'content': 0.20766179263591766, 'timestamp': '2025-10-01 04:16:32.121214', 'step': 3172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:32.162141', 'step': 3172, 'epoch': 1} {'type': 'loss', 'content': 0.17019927501678467, 'timestamp': '2025-10-01 04:16:32.164105', 'step': 3173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:32.198714', 'step': 3173, 'epoch': 1} {'type': 'loss', 'content': 0.15819281339645386, 'timestamp': '2025-10-01 04:16:32.200767', 'step': 3174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:32.232600', 'step': 3174, 'epoch': 1} {'type': 'loss', 'content': 0.1710846722126007, 'timestamp': '2025-10-01 04:16:32.235540', 'step': 3175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:32.269182', 'step': 3175, 'epoch': 1} {'type': 'loss', 'content': 0.22945763170719147, 'timestamp': '2025-10-01 04:16:32.292719', 'step': 3176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:32.329161', 'step': 3176, 'epoch': 1} {'type': 'loss', 'content': 0.18507356941699982, 'timestamp': '2025-10-01 04:16:32.331171', 'step': 3177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:32.364495', 'step': 3177, 'epoch': 1} {'type': 'loss', 'content': 0.22670279443264008, 'timestamp': '2025-10-01 04:16:32.366502', 'step': 3178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:32.410750', 'step': 3178, 'epoch': 1} {'type': 'loss', 'content': 0.181118905544281, 'timestamp': '2025-10-01 04:16:32.413573', 'step': 3179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:32.451098', 'step': 3179, 'epoch': 1} {'type': 'loss', 'content': 0.13849513232707977, 'timestamp': '2025-10-01 04:16:32.474486', 'step': 3180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:32.513865', 'step': 3180, 'epoch': 1} {'type': 'loss', 'content': 0.15525726974010468, 'timestamp': '2025-10-01 04:16:32.515773', 'step': 3181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:32.546342', 'step': 3181, 'epoch': 1} {'type': 'loss', 'content': 0.1313304603099823, 'timestamp': '2025-10-01 04:16:32.548242', 'step': 3182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:32.598467', 'step': 3182, 'epoch': 1} {'type': 'loss', 'content': 0.15507322549819946, 'timestamp': '2025-10-01 04:16:32.601177', 'step': 3183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:32.652223', 'step': 3183, 'epoch': 1} {'type': 'loss', 'content': 0.16859908401966095, 'timestamp': '2025-10-01 04:16:32.676407', 'step': 3184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:32.726197', 'step': 3184, 'epoch': 1} {'type': 'loss', 'content': 0.2605822682380676, 'timestamp': '2025-10-01 04:16:32.728029', 'step': 3185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:32.778513', 'step': 3185, 'epoch': 1} {'type': 'loss', 'content': 0.19835180044174194, 'timestamp': '2025-10-01 04:16:32.781297', 'step': 3186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:32.829087', 'step': 3186, 'epoch': 1} {'type': 'loss', 'content': 0.1657540649175644, 'timestamp': '2025-10-01 04:16:32.830641', 'step': 3187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:32.866920', 'step': 3187, 'epoch': 1} {'type': 'loss', 'content': 0.22434662282466888, 'timestamp': '2025-10-01 04:16:32.896513', 'step': 3188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:32.938441', 'step': 3188, 'epoch': 1} {'type': 'loss', 'content': 0.14361532032489777, 'timestamp': '2025-10-01 04:16:32.941212', 'step': 3189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:32.982397', 'step': 3189, 'epoch': 1} {'type': 'loss', 'content': 0.1245877742767334, 'timestamp': '2025-10-01 04:16:32.984638', 'step': 3190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:33.018845', 'step': 3190, 'epoch': 1} {'type': 'loss', 'content': 0.1399306207895279, 'timestamp': '2025-10-01 04:16:33.032019', 'step': 3191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:33.067127', 'step': 3191, 'epoch': 1} {'type': 'loss', 'content': 0.12685851752758026, 'timestamp': '2025-10-01 04:16:33.095873', 'step': 3192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:33.137934', 'step': 3192, 'epoch': 1} {'type': 'loss', 'content': 0.13221894204616547, 'timestamp': '2025-10-01 04:16:33.139750', 'step': 3193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:33.172381', 'step': 3193, 'epoch': 1} {'type': 'loss', 'content': 0.18507717549800873, 'timestamp': '2025-10-01 04:16:33.174453', 'step': 3194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:33.206417', 'step': 3194, 'epoch': 1} {'type': 'loss', 'content': 0.1518833041191101, 'timestamp': '2025-10-01 04:16:33.208272', 'step': 3195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:33.242058', 'step': 3195, 'epoch': 1} {'type': 'loss', 'content': 0.11094659566879272, 'timestamp': '2025-10-01 04:16:33.265435', 'step': 3196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:33.300532', 'step': 3196, 'epoch': 1} {'type': 'loss', 'content': 0.08635913580656052, 'timestamp': '2025-10-01 04:16:33.302544', 'step': 3197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:33.339925', 'step': 3197, 'epoch': 1} {'type': 'loss', 'content': 0.10277887433767319, 'timestamp': '2025-10-01 04:16:33.342994', 'step': 3198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:33.377550', 'step': 3198, 'epoch': 1} {'type': 'loss', 'content': 0.21712633967399597, 'timestamp': '2025-10-01 04:16:33.379525', 'step': 3199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:33.411979', 'step': 3199, 'epoch': 1} {'type': 'loss', 'content': 0.11591696739196777, 'timestamp': '2025-10-01 04:16:33.435378', 'step': 3200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:33.468212', 'step': 3200, 'epoch': 1} {'type': 'loss', 'content': 0.1846303790807724, 'timestamp': '2025-10-01 04:16:33.470890', 'step': 3201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:33.502465', 'step': 3201, 'epoch': 1} {'type': 'loss', 'content': 0.2629401385784149, 'timestamp': '2025-10-01 04:16:33.506189', 'step': 3202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:33.549351', 'step': 3202, 'epoch': 1} {'type': 'loss', 'content': 0.18611563742160797, 'timestamp': '2025-10-01 04:16:33.551474', 'step': 3203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:33.592331', 'step': 3203, 'epoch': 1} {'type': 'loss', 'content': 0.20559099316596985, 'timestamp': '2025-10-01 04:16:33.615773', 'step': 3204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:33.665714', 'step': 3204, 'epoch': 1} {'type': 'loss', 'content': 0.19791308045387268, 'timestamp': '2025-10-01 04:16:33.668899', 'step': 3205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:33.700542', 'step': 3205, 'epoch': 1} {'type': 'loss', 'content': 0.15408578515052795, 'timestamp': '2025-10-01 04:16:33.703870', 'step': 3206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:33.736407', 'step': 3206, 'epoch': 1} {'type': 'loss', 'content': 0.1242835521697998, 'timestamp': '2025-10-01 04:16:33.739238', 'step': 3207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:33.783066', 'step': 3207, 'epoch': 1} {'type': 'loss', 'content': 0.14345616102218628, 'timestamp': '2025-10-01 04:16:33.807602', 'step': 3208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:33.848902', 'step': 3208, 'epoch': 1} {'type': 'loss', 'content': 0.09059130400419235, 'timestamp': '2025-10-01 04:16:33.850653', 'step': 3209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:33.881551', 'step': 3209, 'epoch': 1} {'type': 'loss', 'content': 0.2276206612586975, 'timestamp': '2025-10-01 04:16:33.889455', 'step': 3210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:33.926853', 'step': 3210, 'epoch': 1} {'type': 'loss', 'content': 0.18679724633693695, 'timestamp': '2025-10-01 04:16:33.934847', 'step': 3211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:33.970974', 'step': 3211, 'epoch': 1} {'type': 'loss', 'content': 0.12675705552101135, 'timestamp': '2025-10-01 04:16:33.994810', 'step': 3212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:34.030392', 'step': 3212, 'epoch': 1} {'type': 'loss', 'content': 0.1414703130722046, 'timestamp': '2025-10-01 04:16:34.032335', 'step': 3213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:34.076947', 'step': 3213, 'epoch': 1} {'type': 'loss', 'content': 0.1529829502105713, 'timestamp': '2025-10-01 04:16:34.078865', 'step': 3214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:34.110399', 'step': 3214, 'epoch': 1} {'type': 'loss', 'content': 0.17687980830669403, 'timestamp': '2025-10-01 04:16:34.112381', 'step': 3215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:34.144322', 'step': 3215, 'epoch': 1} {'type': 'loss', 'content': 0.13756965100765228, 'timestamp': '2025-10-01 04:16:34.168104', 'step': 3216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:34.199695', 'step': 3216, 'epoch': 1} {'type': 'loss', 'content': 0.22791217267513275, 'timestamp': '2025-10-01 04:16:34.201744', 'step': 3217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:34.233293', 'step': 3217, 'epoch': 1} {'type': 'loss', 'content': 0.14278779923915863, 'timestamp': '2025-10-01 04:16:34.235695', 'step': 3218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:34.270059', 'step': 3218, 'epoch': 1} {'type': 'loss', 'content': 0.15392477810382843, 'timestamp': '2025-10-01 04:16:34.272013', 'step': 3219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:34.311416', 'step': 3219, 'epoch': 1} {'type': 'loss', 'content': 0.12751999497413635, 'timestamp': '2025-10-01 04:16:34.334807', 'step': 3220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:34.374972', 'step': 3220, 'epoch': 1} {'type': 'loss', 'content': 0.20893873274326324, 'timestamp': '2025-10-01 04:16:34.377272', 'step': 3221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:34.411531', 'step': 3221, 'epoch': 1} {'type': 'loss', 'content': 0.16064319014549255, 'timestamp': '2025-10-01 04:16:34.413367', 'step': 3222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:34.452478', 'step': 3222, 'epoch': 1} {'type': 'loss', 'content': 0.24798759818077087, 'timestamp': '2025-10-01 04:16:34.455349', 'step': 3223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:16:34.493148', 'step': 3223, 'epoch': 1} {'type': 'loss', 'content': 0.14436951279640198, 'timestamp': '2025-10-01 04:16:34.518714', 'step': 3224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:34.550162', 'step': 3224, 'epoch': 1} {'type': 'loss', 'content': 0.16811569035053253, 'timestamp': '2025-10-01 04:16:34.552152', 'step': 3225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:34.585344', 'step': 3225, 'epoch': 1} {'type': 'loss', 'content': 0.12791219353675842, 'timestamp': '2025-10-01 04:16:34.587423', 'step': 3226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:34.621283', 'step': 3226, 'epoch': 1} {'type': 'loss', 'content': 0.1964949369430542, 'timestamp': '2025-10-01 04:16:34.623517', 'step': 3227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:34.654622', 'step': 3227, 'epoch': 1} {'type': 'loss', 'content': 0.2908632457256317, 'timestamp': '2025-10-01 04:16:34.677785', 'step': 3228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:34.707862', 'step': 3228, 'epoch': 1} {'type': 'loss', 'content': 0.10888180881738663, 'timestamp': '2025-10-01 04:16:34.709915', 'step': 3229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:34.746424', 'step': 3229, 'epoch': 1} {'type': 'loss', 'content': 0.17073401808738708, 'timestamp': '2025-10-01 04:16:34.748362', 'step': 3230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:16:34.782210', 'step': 3230, 'epoch': 1} {'type': 'loss', 'content': 0.13611432909965515, 'timestamp': '2025-10-01 04:16:34.786429', 'step': 3231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:34.818878', 'step': 3231, 'epoch': 1} {'type': 'loss', 'content': 0.21818707883358002, 'timestamp': '2025-10-01 04:16:34.842508', 'step': 3232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:34.877355', 'step': 3232, 'epoch': 1} {'type': 'loss', 'content': 0.11679723858833313, 'timestamp': '2025-10-01 04:16:34.879222', 'step': 3233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:34.914581', 'step': 3233, 'epoch': 1} {'type': 'loss', 'content': 0.10238583385944366, 'timestamp': '2025-10-01 04:16:34.916631', 'step': 3234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:34.952585', 'step': 3234, 'epoch': 1} {'type': 'loss', 'content': 0.20706118643283844, 'timestamp': '2025-10-01 04:16:34.954774', 'step': 3235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:34.997477', 'step': 3235, 'epoch': 1} {'type': 'loss', 'content': 0.19492356479167938, 'timestamp': '2025-10-01 04:16:35.021065', 'step': 3236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:35.054234', 'step': 3236, 'epoch': 1} {'type': 'loss', 'content': 0.21757954359054565, 'timestamp': '2025-10-01 04:16:35.056678', 'step': 3237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:35.089681', 'step': 3237, 'epoch': 1} {'type': 'loss', 'content': 0.18576660752296448, 'timestamp': '2025-10-01 04:16:35.091857', 'step': 3238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:35.126501', 'step': 3238, 'epoch': 1} {'type': 'loss', 'content': 0.12296329438686371, 'timestamp': '2025-10-01 04:16:35.128574', 'step': 3239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:35.174401', 'step': 3239, 'epoch': 1} {'type': 'loss', 'content': 0.187522754073143, 'timestamp': '2025-10-01 04:16:35.202491', 'step': 3240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:35.235999', 'step': 3240, 'epoch': 1} {'type': 'loss', 'content': 0.15638521313667297, 'timestamp': '2025-10-01 04:16:35.238029', 'step': 3241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:35.281578', 'step': 3241, 'epoch': 1} {'type': 'loss', 'content': 0.23458485305309296, 'timestamp': '2025-10-01 04:16:35.283539', 'step': 3242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:35.331045', 'step': 3242, 'epoch': 1} {'type': 'loss', 'content': 0.14403240382671356, 'timestamp': '2025-10-01 04:16:35.334028', 'step': 3243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:35.368620', 'step': 3243, 'epoch': 1} {'type': 'loss', 'content': 0.22782625257968903, 'timestamp': '2025-10-01 04:16:35.392382', 'step': 3244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:35.434388', 'step': 3244, 'epoch': 1} {'type': 'loss', 'content': 0.07993072271347046, 'timestamp': '2025-10-01 04:16:35.436294', 'step': 3245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:35.495895', 'step': 3245, 'epoch': 1} {'type': 'loss', 'content': 0.21878281235694885, 'timestamp': '2025-10-01 04:16:35.498134', 'step': 3246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:35.535671', 'step': 3246, 'epoch': 1} {'type': 'loss', 'content': 0.09598123282194138, 'timestamp': '2025-10-01 04:16:35.537789', 'step': 3247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:35.571840', 'step': 3247, 'epoch': 1} {'type': 'loss', 'content': 0.20691034197807312, 'timestamp': '2025-10-01 04:16:35.595990', 'step': 3248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:35.629321', 'step': 3248, 'epoch': 1} {'type': 'loss', 'content': 0.252275675535202, 'timestamp': '2025-10-01 04:16:35.632716', 'step': 3249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:35.681628', 'step': 3249, 'epoch': 1} {'type': 'loss', 'content': 0.20905180275440216, 'timestamp': '2025-10-01 04:16:35.683899', 'step': 3250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:35.719705', 'step': 3250, 'epoch': 1} {'type': 'loss', 'content': 0.1676291525363922, 'timestamp': '2025-10-01 04:16:35.724128', 'step': 3251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:35.771290', 'step': 3251, 'epoch': 1} {'type': 'loss', 'content': 0.13979855179786682, 'timestamp': '2025-10-01 04:16:35.794891', 'step': 3252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:35.841209', 'step': 3252, 'epoch': 1} {'type': 'loss', 'content': 0.12585529685020447, 'timestamp': '2025-10-01 04:16:35.843418', 'step': 3253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:35.880225', 'step': 3253, 'epoch': 1} {'type': 'loss', 'content': 0.09556934237480164, 'timestamp': '2025-10-01 04:16:35.882281', 'step': 3254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:35.919457', 'step': 3254, 'epoch': 1} {'type': 'loss', 'content': 0.1273082196712494, 'timestamp': '2025-10-01 04:16:35.921708', 'step': 3255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:35.953532', 'step': 3255, 'epoch': 1} {'type': 'loss', 'content': 0.08069084584712982, 'timestamp': '2025-10-01 04:16:35.977213', 'step': 3256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:36.010904', 'step': 3256, 'epoch': 1} {'type': 'loss', 'content': 0.16761650145053864, 'timestamp': '2025-10-01 04:16:36.013210', 'step': 3257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:36.050941', 'step': 3257, 'epoch': 1} {'type': 'loss', 'content': 0.1731014996767044, 'timestamp': '2025-10-01 04:16:36.052897', 'step': 3258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:36.091314', 'step': 3258, 'epoch': 1} {'type': 'loss', 'content': 0.20800897479057312, 'timestamp': '2025-10-01 04:16:36.094210', 'step': 3259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:36.137889', 'step': 3259, 'epoch': 1} {'type': 'loss', 'content': 0.14611971378326416, 'timestamp': '2025-10-01 04:16:36.161151', 'step': 3260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:36.198944', 'step': 3260, 'epoch': 1} {'type': 'loss', 'content': 0.19664056599140167, 'timestamp': '2025-10-01 04:16:36.200864', 'step': 3261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:36.241048', 'step': 3261, 'epoch': 1} {'type': 'loss', 'content': 0.10982188582420349, 'timestamp': '2025-10-01 04:16:36.248762', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:16:48.279607', 'step': 3262, 'epoch': 1} {'type': 'pplx', 'content': 8932.396407157605, 'timestamp': '2025-10-01 04:16:48.282212', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:48.320685', 'step': 3262, 'epoch': 1} {'type': 'loss', 'content': 0.16803525388240814, 'timestamp': '2025-10-01 04:16:48.324102', 'step': 3263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:16:48.373166', 'step': 3263, 'epoch': 1} {'type': 'loss', 'content': 0.1747589409351349, 'timestamp': '2025-10-01 04:16:48.398307', 'step': 3264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:48.433127', 'step': 3264, 'epoch': 1} {'type': 'loss', 'content': 0.13352331519126892, 'timestamp': '2025-10-01 04:16:48.435191', 'step': 3265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:48.466624', 'step': 3265, 'epoch': 1} {'type': 'loss', 'content': 0.2614007294178009, 'timestamp': '2025-10-01 04:16:48.468817', 'step': 3266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:48.507860', 'step': 3266, 'epoch': 1} {'type': 'loss', 'content': 0.23166389763355255, 'timestamp': '2025-10-01 04:16:48.509861', 'step': 3267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:48.541558', 'step': 3267, 'epoch': 1} {'type': 'loss', 'content': 0.1351425051689148, 'timestamp': '2025-10-01 04:16:48.564946', 'step': 3268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:48.605088', 'step': 3268, 'epoch': 1} {'type': 'loss', 'content': 0.18659518659114838, 'timestamp': '2025-10-01 04:16:48.606989', 'step': 3269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:48.637859', 'step': 3269, 'epoch': 1} {'type': 'loss', 'content': 0.17864029109477997, 'timestamp': '2025-10-01 04:16:48.640018', 'step': 3270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:48.673315', 'step': 3270, 'epoch': 1} {'type': 'loss', 'content': 0.14478300511837006, 'timestamp': '2025-10-01 04:16:48.675144', 'step': 3271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:48.715369', 'step': 3271, 'epoch': 1} {'type': 'loss', 'content': 0.08754656463861465, 'timestamp': '2025-10-01 04:16:48.738691', 'step': 3272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:48.770690', 'step': 3272, 'epoch': 1} {'type': 'loss', 'content': 0.1738101989030838, 'timestamp': '2025-10-01 04:16:48.772547', 'step': 3273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:48.804216', 'step': 3273, 'epoch': 1} {'type': 'loss', 'content': 0.17581616342067719, 'timestamp': '2025-10-01 04:16:48.806248', 'step': 3274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:48.837818', 'step': 3274, 'epoch': 1} {'type': 'loss', 'content': 0.08358869701623917, 'timestamp': '2025-10-01 04:16:48.840059', 'step': 3275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:48.878547', 'step': 3275, 'epoch': 1} {'type': 'loss', 'content': 0.15023471415042877, 'timestamp': '2025-10-01 04:16:48.901807', 'step': 3276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:48.938057', 'step': 3276, 'epoch': 1} {'type': 'loss', 'content': 0.1721501350402832, 'timestamp': '2025-10-01 04:16:48.939970', 'step': 3277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:48.979793', 'step': 3277, 'epoch': 1} {'type': 'loss', 'content': 0.09684614092111588, 'timestamp': '2025-10-01 04:16:48.981863', 'step': 3278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:49.022400', 'step': 3278, 'epoch': 1} {'type': 'loss', 'content': 0.220942884683609, 'timestamp': '2025-10-01 04:16:49.024549', 'step': 3279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:49.055799', 'step': 3279, 'epoch': 1} {'type': 'loss', 'content': 0.15521807968616486, 'timestamp': '2025-10-01 04:16:49.079464', 'step': 3280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:49.130084', 'step': 3280, 'epoch': 1} {'type': 'loss', 'content': 0.12341725826263428, 'timestamp': '2025-10-01 04:16:49.133746', 'step': 3281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:49.166156', 'step': 3281, 'epoch': 1} {'type': 'loss', 'content': 0.0629352256655693, 'timestamp': '2025-10-01 04:16:49.171223', 'step': 3282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:49.212672', 'step': 3282, 'epoch': 1} {'type': 'loss', 'content': 0.21688593924045563, 'timestamp': '2025-10-01 04:16:49.214874', 'step': 3283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:49.247526', 'step': 3283, 'epoch': 1} {'type': 'loss', 'content': 0.16816455125808716, 'timestamp': '2025-10-01 04:16:49.270887', 'step': 3284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:49.304544', 'step': 3284, 'epoch': 1} {'type': 'loss', 'content': 0.2723899185657501, 'timestamp': '2025-10-01 04:16:49.307667', 'step': 3285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:49.345322', 'step': 3285, 'epoch': 1} {'type': 'loss', 'content': 0.16540995240211487, 'timestamp': '2025-10-01 04:16:49.347283', 'step': 3286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:49.383113', 'step': 3286, 'epoch': 1} {'type': 'loss', 'content': 0.14417919516563416, 'timestamp': '2025-10-01 04:16:49.386078', 'step': 3287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:49.434065', 'step': 3287, 'epoch': 1} {'type': 'loss', 'content': 0.2055748850107193, 'timestamp': '2025-10-01 04:16:49.457163', 'step': 3288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:49.503392', 'step': 3288, 'epoch': 1} {'type': 'loss', 'content': 0.16602736711502075, 'timestamp': '2025-10-01 04:16:49.505314', 'step': 3289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:49.545137', 'step': 3289, 'epoch': 1} {'type': 'loss', 'content': 0.2808050811290741, 'timestamp': '2025-10-01 04:16:49.547097', 'step': 3290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:49.579909', 'step': 3290, 'epoch': 1} {'type': 'loss', 'content': 0.17566251754760742, 'timestamp': '2025-10-01 04:16:49.581733', 'step': 3291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:49.619719', 'step': 3291, 'epoch': 1} {'type': 'loss', 'content': 0.05815257504582405, 'timestamp': '2025-10-01 04:16:49.643449', 'step': 3292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:49.681840', 'step': 3292, 'epoch': 1} {'type': 'loss', 'content': 0.26756733655929565, 'timestamp': '2025-10-01 04:16:49.684098', 'step': 3293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:49.718629', 'step': 3293, 'epoch': 1} {'type': 'loss', 'content': 0.20370759069919586, 'timestamp': '2025-10-01 04:16:49.720912', 'step': 3294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:49.752570', 'step': 3294, 'epoch': 1} {'type': 'loss', 'content': 0.14045056700706482, 'timestamp': '2025-10-01 04:16:49.754511', 'step': 3295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:49.786582', 'step': 3295, 'epoch': 1} {'type': 'loss', 'content': 0.12400121241807938, 'timestamp': '2025-10-01 04:16:49.810435', 'step': 3296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:49.841251', 'step': 3296, 'epoch': 1} {'type': 'loss', 'content': 0.13532061874866486, 'timestamp': '2025-10-01 04:16:49.843386', 'step': 3297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:49.875042', 'step': 3297, 'epoch': 1} {'type': 'loss', 'content': 0.15392069518566132, 'timestamp': '2025-10-01 04:16:49.876902', 'step': 3298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:49.911238', 'step': 3298, 'epoch': 1} {'type': 'loss', 'content': 0.09948157519102097, 'timestamp': '2025-10-01 04:16:49.913176', 'step': 3299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:49.944669', 'step': 3299, 'epoch': 1} {'type': 'loss', 'content': 0.15866291522979736, 'timestamp': '2025-10-01 04:16:49.968237', 'step': 3300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:49.999468', 'step': 3300, 'epoch': 1} {'type': 'loss', 'content': 0.11883017420768738, 'timestamp': '2025-10-01 04:16:50.001336', 'step': 3301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:50.034008', 'step': 3301, 'epoch': 1} {'type': 'loss', 'content': 0.2647727429866791, 'timestamp': '2025-10-01 04:16:50.035749', 'step': 3302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:50.069380', 'step': 3302, 'epoch': 1} {'type': 'loss', 'content': 0.20801851153373718, 'timestamp': '2025-10-01 04:16:50.072025', 'step': 3303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:50.105162', 'step': 3303, 'epoch': 1} {'type': 'loss', 'content': 0.12439044564962387, 'timestamp': '2025-10-01 04:16:50.128410', 'step': 3304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:50.164513', 'step': 3304, 'epoch': 1} {'type': 'loss', 'content': 0.25809434056282043, 'timestamp': '2025-10-01 04:16:50.166418', 'step': 3305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:50.199921', 'step': 3305, 'epoch': 1} {'type': 'loss', 'content': 0.12386635690927505, 'timestamp': '2025-10-01 04:16:50.202008', 'step': 3306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:50.236045', 'step': 3306, 'epoch': 1} {'type': 'loss', 'content': 0.1637200117111206, 'timestamp': '2025-10-01 04:16:50.237853', 'step': 3307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:50.271595', 'step': 3307, 'epoch': 1} {'type': 'loss', 'content': 0.13823944330215454, 'timestamp': '2025-10-01 04:16:50.295315', 'step': 3308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:50.329265', 'step': 3308, 'epoch': 1} {'type': 'loss', 'content': 0.16508500277996063, 'timestamp': '2025-10-01 04:16:50.331384', 'step': 3309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:50.363356', 'step': 3309, 'epoch': 1} {'type': 'loss', 'content': 0.14746926724910736, 'timestamp': '2025-10-01 04:16:50.365613', 'step': 3310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:50.405916', 'step': 3310, 'epoch': 1} {'type': 'loss', 'content': 0.1902327984571457, 'timestamp': '2025-10-01 04:16:50.407817', 'step': 3311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:50.443815', 'step': 3311, 'epoch': 1} {'type': 'loss', 'content': 0.1730509102344513, 'timestamp': '2025-10-01 04:16:50.468733', 'step': 3312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:50.511961', 'step': 3312, 'epoch': 1} {'type': 'loss', 'content': 0.1378750056028366, 'timestamp': '2025-10-01 04:16:50.514220', 'step': 3313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:50.560424', 'step': 3313, 'epoch': 1} {'type': 'loss', 'content': 0.16863825917243958, 'timestamp': '2025-10-01 04:16:50.562832', 'step': 3314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:50.596542', 'step': 3314, 'epoch': 1} {'type': 'loss', 'content': 0.14385122060775757, 'timestamp': '2025-10-01 04:16:50.599072', 'step': 3315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:50.645682', 'step': 3315, 'epoch': 1} {'type': 'loss', 'content': 0.12432468682527542, 'timestamp': '2025-10-01 04:16:50.669068', 'step': 3316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:50.712717', 'step': 3316, 'epoch': 1} {'type': 'loss', 'content': 0.12771600484848022, 'timestamp': '2025-10-01 04:16:50.722595', 'step': 3317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:50.767363', 'step': 3317, 'epoch': 1} {'type': 'loss', 'content': 0.15356427431106567, 'timestamp': '2025-10-01 04:16:50.770308', 'step': 3318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:50.804571', 'step': 3318, 'epoch': 1} {'type': 'loss', 'content': 0.18097162246704102, 'timestamp': '2025-10-01 04:16:50.807257', 'step': 3319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:50.850109', 'step': 3319, 'epoch': 1} {'type': 'loss', 'content': 0.1444554328918457, 'timestamp': '2025-10-01 04:16:50.873604', 'step': 3320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:50.912014', 'step': 3320, 'epoch': 1} {'type': 'loss', 'content': 0.20811456441879272, 'timestamp': '2025-10-01 04:16:50.913877', 'step': 3321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:50.947730', 'step': 3321, 'epoch': 1} {'type': 'loss', 'content': 0.10646994411945343, 'timestamp': '2025-10-01 04:16:50.950044', 'step': 3322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:16:50.984014', 'step': 3322, 'epoch': 1} {'type': 'loss', 'content': 0.08548936247825623, 'timestamp': '2025-10-01 04:16:50.988371', 'step': 3323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:51.020732', 'step': 3323, 'epoch': 1} {'type': 'loss', 'content': 0.13083229959011078, 'timestamp': '2025-10-01 04:16:51.044399', 'step': 3324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:51.080179', 'step': 3324, 'epoch': 1} {'type': 'loss', 'content': 0.2834569215774536, 'timestamp': '2025-10-01 04:16:51.082059', 'step': 3325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:51.112689', 'step': 3325, 'epoch': 1} {'type': 'loss', 'content': 0.1351054459810257, 'timestamp': '2025-10-01 04:16:51.116320', 'step': 3326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:51.155652', 'step': 3326, 'epoch': 1} {'type': 'loss', 'content': 0.21759837865829468, 'timestamp': '2025-10-01 04:16:51.157959', 'step': 3327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:16:51.194560', 'step': 3327, 'epoch': 1} {'type': 'loss', 'content': 0.21075576543807983, 'timestamp': '2025-10-01 04:16:51.227035', 'step': 3328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:51.262335', 'step': 3328, 'epoch': 1} {'type': 'loss', 'content': 0.21810132265090942, 'timestamp': '2025-10-01 04:16:51.264856', 'step': 3329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:51.295958', 'step': 3329, 'epoch': 1} {'type': 'loss', 'content': 0.11805583536624908, 'timestamp': '2025-10-01 04:16:51.298238', 'step': 3330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:51.334795', 'step': 3330, 'epoch': 1} {'type': 'loss', 'content': 0.08636409789323807, 'timestamp': '2025-10-01 04:16:51.337200', 'step': 3331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:51.373172', 'step': 3331, 'epoch': 1} {'type': 'loss', 'content': 0.1923428177833557, 'timestamp': '2025-10-01 04:16:51.396553', 'step': 3332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:51.434791', 'step': 3332, 'epoch': 1} {'type': 'loss', 'content': 0.10467424988746643, 'timestamp': '2025-10-01 04:16:51.436396', 'step': 3333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:51.471181', 'step': 3333, 'epoch': 1} {'type': 'loss', 'content': 0.22419796884059906, 'timestamp': '2025-10-01 04:16:51.473246', 'step': 3334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:51.508074', 'step': 3334, 'epoch': 1} {'type': 'loss', 'content': 0.14667579531669617, 'timestamp': '2025-10-01 04:16:51.509915', 'step': 3335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:51.562281', 'step': 3335, 'epoch': 1} {'type': 'loss', 'content': 0.1768500804901123, 'timestamp': '2025-10-01 04:16:51.586256', 'step': 3336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:51.619146', 'step': 3336, 'epoch': 1} {'type': 'loss', 'content': 0.11507885903120041, 'timestamp': '2025-10-01 04:16:51.621188', 'step': 3337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:51.667793', 'step': 3337, 'epoch': 1} {'type': 'loss', 'content': 0.12847162783145905, 'timestamp': '2025-10-01 04:16:51.670247', 'step': 3338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:51.704089', 'step': 3338, 'epoch': 1} {'type': 'loss', 'content': 0.28001517057418823, 'timestamp': '2025-10-01 04:16:51.715538', 'step': 3339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-10-01 04:16:51.756310', 'step': 3339, 'epoch': 1} {'type': 'loss', 'content': 0.17789679765701294, 'timestamp': '2025-10-01 04:16:51.793578', 'step': 3340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:51.826412', 'step': 3340, 'epoch': 1} {'type': 'loss', 'content': 0.22008880972862244, 'timestamp': '2025-10-01 04:16:51.829741', 'step': 3341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:51.863350', 'step': 3341, 'epoch': 1} {'type': 'loss', 'content': 0.25690653920173645, 'timestamp': '2025-10-01 04:16:51.865352', 'step': 3342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:51.903877', 'step': 3342, 'epoch': 1} {'type': 'loss', 'content': 0.16482722759246826, 'timestamp': '2025-10-01 04:16:51.906332', 'step': 3343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:51.941721', 'step': 3343, 'epoch': 1} {'type': 'loss', 'content': 0.2268463522195816, 'timestamp': '2025-10-01 04:16:51.965116', 'step': 3344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:52.000381', 'step': 3344, 'epoch': 1} {'type': 'loss', 'content': 0.15284839272499084, 'timestamp': '2025-10-01 04:16:52.010823', 'step': 3345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:52.043691', 'step': 3345, 'epoch': 1} {'type': 'loss', 'content': 0.18418261408805847, 'timestamp': '2025-10-01 04:16:52.045858', 'step': 3346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:52.079454', 'step': 3346, 'epoch': 1} {'type': 'loss', 'content': 0.16022609174251556, 'timestamp': '2025-10-01 04:16:52.082668', 'step': 3347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:52.127858', 'step': 3347, 'epoch': 1} {'type': 'loss', 'content': 0.135879248380661, 'timestamp': '2025-10-01 04:16:52.151727', 'step': 3348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:52.206873', 'step': 3348, 'epoch': 1} {'type': 'loss', 'content': 0.3112732172012329, 'timestamp': '2025-10-01 04:16:52.209125', 'step': 3349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:52.254454', 'step': 3349, 'epoch': 1} {'type': 'loss', 'content': 0.19086743891239166, 'timestamp': '2025-10-01 04:16:52.256652', 'step': 3350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:52.295041', 'step': 3350, 'epoch': 1} {'type': 'loss', 'content': 0.11892921477556229, 'timestamp': '2025-10-01 04:16:52.297515', 'step': 3351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:52.330870', 'step': 3351, 'epoch': 1} {'type': 'loss', 'content': 0.15511180460453033, 'timestamp': '2025-10-01 04:16:52.354733', 'step': 3352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:52.406842', 'step': 3352, 'epoch': 1} {'type': 'loss', 'content': 0.15221695601940155, 'timestamp': '2025-10-01 04:16:52.409776', 'step': 3353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:52.445894', 'step': 3353, 'epoch': 1} {'type': 'loss', 'content': 0.13124671578407288, 'timestamp': '2025-10-01 04:16:52.447907', 'step': 3354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:52.500285', 'step': 3354, 'epoch': 1} {'type': 'loss', 'content': 0.2310514599084854, 'timestamp': '2025-10-01 04:16:52.502482', 'step': 3355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:52.548687', 'step': 3355, 'epoch': 1} {'type': 'loss', 'content': 0.20203803479671478, 'timestamp': '2025-10-01 04:16:52.572229', 'step': 3356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:52.609453', 'step': 3356, 'epoch': 1} {'type': 'loss', 'content': 0.12981374561786652, 'timestamp': '2025-10-01 04:16:52.612047', 'step': 3357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:52.653759', 'step': 3357, 'epoch': 1} {'type': 'loss', 'content': 0.13836823403835297, 'timestamp': '2025-10-01 04:16:52.656113', 'step': 3358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:52.701657', 'step': 3358, 'epoch': 1} {'type': 'loss', 'content': 0.09902196377515793, 'timestamp': '2025-10-01 04:16:52.704210', 'step': 3359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:52.738725', 'step': 3359, 'epoch': 1} {'type': 'loss', 'content': 0.19694975018501282, 'timestamp': '2025-10-01 04:16:52.765759', 'step': 3360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:52.798405', 'step': 3360, 'epoch': 1} {'type': 'loss', 'content': 0.16217505931854248, 'timestamp': '2025-10-01 04:16:52.800264', 'step': 3361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:52.831848', 'step': 3361, 'epoch': 1} {'type': 'loss', 'content': 0.16131193935871124, 'timestamp': '2025-10-01 04:16:52.833876', 'step': 3362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:52.871228', 'step': 3362, 'epoch': 1} {'type': 'loss', 'content': 0.08745478093624115, 'timestamp': '2025-10-01 04:16:52.873142', 'step': 3363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:52.911919', 'step': 3363, 'epoch': 1} {'type': 'loss', 'content': 0.17618927359580994, 'timestamp': '2025-10-01 04:16:52.935937', 'step': 3364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:52.971755', 'step': 3364, 'epoch': 1} {'type': 'loss', 'content': 0.28941160440444946, 'timestamp': '2025-10-01 04:16:52.973931', 'step': 3365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:53.015767', 'step': 3365, 'epoch': 1} {'type': 'loss', 'content': 0.11417968571186066, 'timestamp': '2025-10-01 04:16:53.018040', 'step': 3366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:53.072387', 'step': 3366, 'epoch': 1} {'type': 'loss', 'content': 0.1880984753370285, 'timestamp': '2025-10-01 04:16:53.074361', 'step': 3367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:16:53.110172', 'step': 3367, 'epoch': 1} {'type': 'loss', 'content': 0.16332276165485382, 'timestamp': '2025-10-01 04:16:53.135314', 'step': 3368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:53.169290', 'step': 3368, 'epoch': 1} {'type': 'loss', 'content': 0.23653563857078552, 'timestamp': '2025-10-01 04:16:53.171311', 'step': 3369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:53.206100', 'step': 3369, 'epoch': 1} {'type': 'loss', 'content': 0.11442437767982483, 'timestamp': '2025-10-01 04:16:53.208203', 'step': 3370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:53.241425', 'step': 3370, 'epoch': 1} {'type': 'loss', 'content': 0.21049687266349792, 'timestamp': '2025-10-01 04:16:53.243211', 'step': 3371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:53.279157', 'step': 3371, 'epoch': 1} {'type': 'loss', 'content': 0.15643012523651123, 'timestamp': '2025-10-01 04:16:53.302671', 'step': 3372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:53.336802', 'step': 3372, 'epoch': 1} {'type': 'loss', 'content': 0.13661780953407288, 'timestamp': '2025-10-01 04:16:53.338740', 'step': 3373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:53.370275', 'step': 3373, 'epoch': 1} {'type': 'loss', 'content': 0.15098658204078674, 'timestamp': '2025-10-01 04:16:53.373078', 'step': 3374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:53.404841', 'step': 3374, 'epoch': 1} {'type': 'loss', 'content': 0.19778071343898773, 'timestamp': '2025-10-01 04:16:53.406738', 'step': 3375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:53.439246', 'step': 3375, 'epoch': 1} {'type': 'loss', 'content': 0.24820075929164886, 'timestamp': '2025-10-01 04:16:53.462663', 'step': 3376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:53.495126', 'step': 3376, 'epoch': 1} {'type': 'loss', 'content': 0.231611967086792, 'timestamp': '2025-10-01 04:16:53.496938', 'step': 3377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:53.531951', 'step': 3377, 'epoch': 1} {'type': 'loss', 'content': 0.16178485751152039, 'timestamp': '2025-10-01 04:16:53.533817', 'step': 3378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:53.564807', 'step': 3378, 'epoch': 1} {'type': 'loss', 'content': 0.1356896609067917, 'timestamp': '2025-10-01 04:16:53.566761', 'step': 3379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:53.597512', 'step': 3379, 'epoch': 1} {'type': 'loss', 'content': 0.1733686923980713, 'timestamp': '2025-10-01 04:16:53.621016', 'step': 3380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:53.652312', 'step': 3380, 'epoch': 1} {'type': 'loss', 'content': 0.21031807363033295, 'timestamp': '2025-10-01 04:16:53.654822', 'step': 3381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:53.697224', 'step': 3381, 'epoch': 1} {'type': 'loss', 'content': 0.21864373981952667, 'timestamp': '2025-10-01 04:16:53.699384', 'step': 3382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:53.732872', 'step': 3382, 'epoch': 1} {'type': 'loss', 'content': 0.16984984278678894, 'timestamp': '2025-10-01 04:16:53.735381', 'step': 3383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:53.766162', 'step': 3383, 'epoch': 1} {'type': 'loss', 'content': 0.1348494440317154, 'timestamp': '2025-10-01 04:16:53.789660', 'step': 3384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:53.821819', 'step': 3384, 'epoch': 1} {'type': 'loss', 'content': 0.19450925290584564, 'timestamp': '2025-10-01 04:16:53.824635', 'step': 3385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:53.856658', 'step': 3385, 'epoch': 1} {'type': 'loss', 'content': 0.17202752828598022, 'timestamp': '2025-10-01 04:16:53.858789', 'step': 3386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:53.892041', 'step': 3386, 'epoch': 1} {'type': 'loss', 'content': 0.12028342485427856, 'timestamp': '2025-10-01 04:16:53.893971', 'step': 3387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:53.926112', 'step': 3387, 'epoch': 1} {'type': 'loss', 'content': 0.09469406306743622, 'timestamp': '2025-10-01 04:16:53.949592', 'step': 3388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:53.985803', 'step': 3388, 'epoch': 1} {'type': 'loss', 'content': 0.1669403314590454, 'timestamp': '2025-10-01 04:16:53.987855', 'step': 3389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:54.022264', 'step': 3389, 'epoch': 1} {'type': 'loss', 'content': 0.22656641900539398, 'timestamp': '2025-10-01 04:16:54.024230', 'step': 3390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:54.055838', 'step': 3390, 'epoch': 1} {'type': 'loss', 'content': 0.18009041249752045, 'timestamp': '2025-10-01 04:16:54.057932', 'step': 3391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:54.089875', 'step': 3391, 'epoch': 1} {'type': 'loss', 'content': 0.17069999873638153, 'timestamp': '2025-10-01 04:16:54.113575', 'step': 3392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:54.145201', 'step': 3392, 'epoch': 1} {'type': 'loss', 'content': 0.11679059267044067, 'timestamp': '2025-10-01 04:16:54.147329', 'step': 3393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:54.181481', 'step': 3393, 'epoch': 1} {'type': 'loss', 'content': 0.12326359003782272, 'timestamp': '2025-10-01 04:16:54.183627', 'step': 3394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:54.216637', 'step': 3394, 'epoch': 1} {'type': 'loss', 'content': 0.10591187328100204, 'timestamp': '2025-10-01 04:16:54.218734', 'step': 3395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:54.258930', 'step': 3395, 'epoch': 1} {'type': 'loss', 'content': 0.21041664481163025, 'timestamp': '2025-10-01 04:16:54.282814', 'step': 3396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:54.322526', 'step': 3396, 'epoch': 1} {'type': 'loss', 'content': 0.17440944910049438, 'timestamp': '2025-10-01 04:16:54.324473', 'step': 3397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:54.361459', 'step': 3397, 'epoch': 1} {'type': 'loss', 'content': 0.18148159980773926, 'timestamp': '2025-10-01 04:16:54.372029', 'step': 3398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:54.407794', 'step': 3398, 'epoch': 1} {'type': 'loss', 'content': 0.2058156132698059, 'timestamp': '2025-10-01 04:16:54.415614', 'step': 3399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:54.449580', 'step': 3399, 'epoch': 1} {'type': 'loss', 'content': 0.13421368598937988, 'timestamp': '2025-10-01 04:16:54.473012', 'step': 3400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:54.509629', 'step': 3400, 'epoch': 1} {'type': 'loss', 'content': 0.1623706966638565, 'timestamp': '2025-10-01 04:16:54.511516', 'step': 3401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:54.551072', 'step': 3401, 'epoch': 1} {'type': 'loss', 'content': 0.12637776136398315, 'timestamp': '2025-10-01 04:16:54.552989', 'step': 3402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:54.585983', 'step': 3402, 'epoch': 1} {'type': 'loss', 'content': 0.2912006676197052, 'timestamp': '2025-10-01 04:16:54.588358', 'step': 3403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:54.630693', 'step': 3403, 'epoch': 1} {'type': 'loss', 'content': 0.1899058073759079, 'timestamp': '2025-10-01 04:16:54.654543', 'step': 3404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:54.697618', 'step': 3404, 'epoch': 1} {'type': 'loss', 'content': 0.1679849922657013, 'timestamp': '2025-10-01 04:16:54.699751', 'step': 3405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:54.739940', 'step': 3405, 'epoch': 1} {'type': 'loss', 'content': 0.13583844900131226, 'timestamp': '2025-10-01 04:16:54.742021', 'step': 3406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:16:54.775765', 'step': 3406, 'epoch': 1} {'type': 'loss', 'content': 0.19107139110565186, 'timestamp': '2025-10-01 04:16:54.778409', 'step': 3407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:54.811109', 'step': 3407, 'epoch': 1} {'type': 'loss', 'content': 0.19721782207489014, 'timestamp': '2025-10-01 04:16:54.834630', 'step': 3408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:54.869642', 'step': 3408, 'epoch': 1} {'type': 'loss', 'content': 0.11891229450702667, 'timestamp': '2025-10-01 04:16:54.871640', 'step': 3409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:54.916213', 'step': 3409, 'epoch': 1} {'type': 'loss', 'content': 0.13334877789020538, 'timestamp': '2025-10-01 04:16:54.918146', 'step': 3410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:54.951075', 'step': 3410, 'epoch': 1} {'type': 'loss', 'content': 0.0577240027487278, 'timestamp': '2025-10-01 04:16:54.953081', 'step': 3411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:55.011919', 'step': 3411, 'epoch': 1} {'type': 'loss', 'content': 0.23808543384075165, 'timestamp': '2025-10-01 04:16:55.035641', 'step': 3412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:55.080021', 'step': 3412, 'epoch': 1} {'type': 'loss', 'content': 0.17893050611019135, 'timestamp': '2025-10-01 04:16:55.081884', 'step': 3413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:55.115886', 'step': 3413, 'epoch': 1} {'type': 'loss', 'content': 0.13922780752182007, 'timestamp': '2025-10-01 04:16:55.117650', 'step': 3414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:55.154388', 'step': 3414, 'epoch': 1} {'type': 'loss', 'content': 0.12756703794002533, 'timestamp': '2025-10-01 04:16:55.156345', 'step': 3415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:55.191015', 'step': 3415, 'epoch': 1} {'type': 'loss', 'content': 0.1818603128194809, 'timestamp': '2025-10-01 04:16:55.214578', 'step': 3416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:55.265696', 'step': 3416, 'epoch': 1} {'type': 'loss', 'content': 0.12425453215837479, 'timestamp': '2025-10-01 04:16:55.267761', 'step': 3417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:55.310911', 'step': 3417, 'epoch': 1} {'type': 'loss', 'content': 0.13524918258190155, 'timestamp': '2025-10-01 04:16:55.312902', 'step': 3418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:55.345282', 'step': 3418, 'epoch': 1} {'type': 'loss', 'content': 0.13523469865322113, 'timestamp': '2025-10-01 04:16:55.347097', 'step': 3419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:55.385090', 'step': 3419, 'epoch': 1} {'type': 'loss', 'content': 0.10087409615516663, 'timestamp': '2025-10-01 04:16:55.408594', 'step': 3420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:55.444182', 'step': 3420, 'epoch': 1} {'type': 'loss', 'content': 0.2123887985944748, 'timestamp': '2025-10-01 04:16:55.446158', 'step': 3421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:55.487422', 'step': 3421, 'epoch': 1} {'type': 'loss', 'content': 0.13096775114536285, 'timestamp': '2025-10-01 04:16:55.489418', 'step': 3422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:55.520853', 'step': 3422, 'epoch': 1} {'type': 'loss', 'content': 0.15637527406215668, 'timestamp': '2025-10-01 04:16:55.522691', 'step': 3423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:55.555387', 'step': 3423, 'epoch': 1} {'type': 'loss', 'content': 0.10097481310367584, 'timestamp': '2025-10-01 04:16:55.579386', 'step': 3424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:55.619981', 'step': 3424, 'epoch': 1} {'type': 'loss', 'content': 0.1024855375289917, 'timestamp': '2025-10-01 04:16:55.621765', 'step': 3425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:55.657701', 'step': 3425, 'epoch': 1} {'type': 'loss', 'content': 0.15356707572937012, 'timestamp': '2025-10-01 04:16:55.659391', 'step': 3426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:55.692455', 'step': 3426, 'epoch': 1} {'type': 'loss', 'content': 0.2694600522518158, 'timestamp': '2025-10-01 04:16:55.696315', 'step': 3427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:55.727212', 'step': 3427, 'epoch': 1} {'type': 'loss', 'content': 0.2757348120212555, 'timestamp': '2025-10-01 04:16:55.750852', 'step': 3428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:55.786987', 'step': 3428, 'epoch': 1} {'type': 'loss', 'content': 0.17569680511951447, 'timestamp': '2025-10-01 04:16:55.788866', 'step': 3429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:55.830409', 'step': 3429, 'epoch': 1} {'type': 'loss', 'content': 0.23362064361572266, 'timestamp': '2025-10-01 04:16:55.832397', 'step': 3430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:55.874982', 'step': 3430, 'epoch': 1} {'type': 'loss', 'content': 0.15019379556179047, 'timestamp': '2025-10-01 04:16:55.877001', 'step': 3431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:55.913435', 'step': 3431, 'epoch': 1} {'type': 'loss', 'content': 0.14710472524166107, 'timestamp': '2025-10-01 04:16:55.937053', 'step': 3432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:55.981579', 'step': 3432, 'epoch': 1} {'type': 'loss', 'content': 0.08361213654279709, 'timestamp': '2025-10-01 04:16:55.983548', 'step': 3433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:56.022579', 'step': 3433, 'epoch': 1} {'type': 'loss', 'content': 0.17911039292812347, 'timestamp': '2025-10-01 04:16:56.032642', 'step': 3434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:56.063855', 'step': 3434, 'epoch': 1} {'type': 'loss', 'content': 0.17737837135791779, 'timestamp': '2025-10-01 04:16:56.065918', 'step': 3435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:56.101678', 'step': 3435, 'epoch': 1} {'type': 'loss', 'content': 0.27280035614967346, 'timestamp': '2025-10-01 04:16:56.125075', 'step': 3436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:56.158136', 'step': 3436, 'epoch': 1} {'type': 'loss', 'content': 0.10855191200971603, 'timestamp': '2025-10-01 04:16:56.160275', 'step': 3437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:56.193133', 'step': 3437, 'epoch': 1} {'type': 'loss', 'content': 0.2561601400375366, 'timestamp': '2025-10-01 04:16:56.195075', 'step': 3438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:56.226528', 'step': 3438, 'epoch': 1} {'type': 'loss', 'content': 0.12057371437549591, 'timestamp': '2025-10-01 04:16:56.228806', 'step': 3439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:56.268278', 'step': 3439, 'epoch': 1} {'type': 'loss', 'content': 0.1666470170021057, 'timestamp': '2025-10-01 04:16:56.292135', 'step': 3440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:56.334912', 'step': 3440, 'epoch': 1} {'type': 'loss', 'content': 0.12662935256958008, 'timestamp': '2025-10-01 04:16:56.336812', 'step': 3441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:56.369472', 'step': 3441, 'epoch': 1} {'type': 'loss', 'content': 0.17524105310440063, 'timestamp': '2025-10-01 04:16:56.371413', 'step': 3442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:56.404647', 'step': 3442, 'epoch': 1} {'type': 'loss', 'content': 0.1604519635438919, 'timestamp': '2025-10-01 04:16:56.406533', 'step': 3443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:16:56.452686', 'step': 3443, 'epoch': 1} {'type': 'loss', 'content': 0.11124980449676514, 'timestamp': '2025-10-01 04:16:56.476007', 'step': 3444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:56.514471', 'step': 3444, 'epoch': 1} {'type': 'loss', 'content': 0.1849173605442047, 'timestamp': '2025-10-01 04:16:56.516395', 'step': 3445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:56.549609', 'step': 3445, 'epoch': 1} {'type': 'loss', 'content': 0.1516631841659546, 'timestamp': '2025-10-01 04:16:56.551893', 'step': 3446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:56.586367', 'step': 3446, 'epoch': 1} {'type': 'loss', 'content': 0.17981915175914764, 'timestamp': '2025-10-01 04:16:56.588229', 'step': 3447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:56.619173', 'step': 3447, 'epoch': 1} {'type': 'loss', 'content': 0.20151282846927643, 'timestamp': '2025-10-01 04:16:56.642743', 'step': 3448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:56.674867', 'step': 3448, 'epoch': 1} {'type': 'loss', 'content': 0.13508719205856323, 'timestamp': '2025-10-01 04:16:56.676898', 'step': 3449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:56.721309', 'step': 3449, 'epoch': 1} {'type': 'loss', 'content': 0.16892912983894348, 'timestamp': '2025-10-01 04:16:56.723724', 'step': 3450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:56.768041', 'step': 3450, 'epoch': 1} {'type': 'loss', 'content': 0.20668430626392365, 'timestamp': '2025-10-01 04:16:56.769750', 'step': 3451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:56.802372', 'step': 3451, 'epoch': 1} {'type': 'loss', 'content': 0.196768119931221, 'timestamp': '2025-10-01 04:16:56.826547', 'step': 3452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:56.859502', 'step': 3452, 'epoch': 1} {'type': 'loss', 'content': 0.21222199499607086, 'timestamp': '2025-10-01 04:16:56.861750', 'step': 3453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:56.893622', 'step': 3453, 'epoch': 1} {'type': 'loss', 'content': 0.18237575888633728, 'timestamp': '2025-10-01 04:16:56.896033', 'step': 3454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:56.931164', 'step': 3454, 'epoch': 1} {'type': 'loss', 'content': 0.136239692568779, 'timestamp': '2025-10-01 04:16:56.933863', 'step': 3455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:56.969369', 'step': 3455, 'epoch': 1} {'type': 'loss', 'content': 0.1906772404909134, 'timestamp': '2025-10-01 04:16:56.992988', 'step': 3456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.033010', 'step': 3456, 'epoch': 1} {'type': 'loss', 'content': 0.07342872768640518, 'timestamp': '2025-10-01 04:16:57.035064', 'step': 3457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.067925', 'step': 3457, 'epoch': 1} {'type': 'loss', 'content': 0.1196449026465416, 'timestamp': '2025-10-01 04:16:57.069951', 'step': 3458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.129081', 'step': 3458, 'epoch': 1} {'type': 'loss', 'content': 0.19061683118343353, 'timestamp': '2025-10-01 04:16:57.131130', 'step': 3459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:57.176747', 'step': 3459, 'epoch': 1} {'type': 'loss', 'content': 0.12809331715106964, 'timestamp': '2025-10-01 04:16:57.200258', 'step': 3460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:57.234512', 'step': 3460, 'epoch': 1} {'type': 'loss', 'content': 0.1434154212474823, 'timestamp': '2025-10-01 04:16:57.236757', 'step': 3461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:57.268692', 'step': 3461, 'epoch': 1} {'type': 'loss', 'content': 0.10326545685529709, 'timestamp': '2025-10-01 04:16:57.270537', 'step': 3462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.317086', 'step': 3462, 'epoch': 1} {'type': 'loss', 'content': 0.1466376781463623, 'timestamp': '2025-10-01 04:16:57.319071', 'step': 3463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.359175', 'step': 3463, 'epoch': 1} {'type': 'loss', 'content': 0.1334764063358307, 'timestamp': '2025-10-01 04:16:57.382669', 'step': 3464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.415726', 'step': 3464, 'epoch': 1} {'type': 'loss', 'content': 0.1728598177433014, 'timestamp': '2025-10-01 04:16:57.417559', 'step': 3465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.458332', 'step': 3465, 'epoch': 1} {'type': 'loss', 'content': 0.11936284601688385, 'timestamp': '2025-10-01 04:16:57.468659', 'step': 3466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.507080', 'step': 3466, 'epoch': 1} {'type': 'loss', 'content': 0.20211516320705414, 'timestamp': '2025-10-01 04:16:57.509125', 'step': 3467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:57.542355', 'step': 3467, 'epoch': 1} {'type': 'loss', 'content': 0.14998792111873627, 'timestamp': '2025-10-01 04:16:57.565935', 'step': 3468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:57.605057', 'step': 3468, 'epoch': 1} {'type': 'loss', 'content': 0.1611449122428894, 'timestamp': '2025-10-01 04:16:57.607322', 'step': 3469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.640495', 'step': 3469, 'epoch': 1} {'type': 'loss', 'content': 0.17500156164169312, 'timestamp': '2025-10-01 04:16:57.642422', 'step': 3470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:57.673638', 'step': 3470, 'epoch': 1} {'type': 'loss', 'content': 0.23974767327308655, 'timestamp': '2025-10-01 04:16:57.675900', 'step': 3471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:57.707114', 'step': 3471, 'epoch': 1} {'type': 'loss', 'content': 0.14299215376377106, 'timestamp': '2025-10-01 04:16:57.730661', 'step': 3472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:57.764400', 'step': 3472, 'epoch': 1} {'type': 'loss', 'content': 0.1452416628599167, 'timestamp': '2025-10-01 04:16:57.766475', 'step': 3473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:57.797021', 'step': 3473, 'epoch': 1} {'type': 'loss', 'content': 0.18776585161685944, 'timestamp': '2025-10-01 04:16:57.799119', 'step': 3474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:57.830914', 'step': 3474, 'epoch': 1} {'type': 'loss', 'content': 0.15304823219776154, 'timestamp': '2025-10-01 04:16:57.833409', 'step': 3475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:57.863055', 'step': 3475, 'epoch': 1} {'type': 'loss', 'content': 0.26032787561416626, 'timestamp': '2025-10-01 04:16:57.886374', 'step': 3476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.917486', 'step': 3476, 'epoch': 1} {'type': 'loss', 'content': 0.1652776300907135, 'timestamp': '2025-10-01 04:16:57.919442', 'step': 3477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:57.949688', 'step': 3477, 'epoch': 1} {'type': 'loss', 'content': 0.15106835961341858, 'timestamp': '2025-10-01 04:16:57.951793', 'step': 3478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:57.982551', 'step': 3478, 'epoch': 1} {'type': 'loss', 'content': 0.16506600379943848, 'timestamp': '2025-10-01 04:16:57.984689', 'step': 3479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:58.019334', 'step': 3479, 'epoch': 1} {'type': 'loss', 'content': 0.1959732174873352, 'timestamp': '2025-10-01 04:16:58.042740', 'step': 3480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:58.075088', 'step': 3480, 'epoch': 1} {'type': 'loss', 'content': 0.15406261384487152, 'timestamp': '2025-10-01 04:16:58.077029', 'step': 3481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:58.107390', 'step': 3481, 'epoch': 1} {'type': 'loss', 'content': 0.2384842038154602, 'timestamp': '2025-10-01 04:16:58.109500', 'step': 3482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:58.142041', 'step': 3482, 'epoch': 1} {'type': 'loss', 'content': 0.19426988065242767, 'timestamp': '2025-10-01 04:16:58.144386', 'step': 3483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:58.180736', 'step': 3483, 'epoch': 1} {'type': 'loss', 'content': 0.15969377756118774, 'timestamp': '2025-10-01 04:16:58.204381', 'step': 3484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:58.236270', 'step': 3484, 'epoch': 1} {'type': 'loss', 'content': 0.14372402429580688, 'timestamp': '2025-10-01 04:16:58.238223', 'step': 3485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:58.268830', 'step': 3485, 'epoch': 1} {'type': 'loss', 'content': 0.11654944717884064, 'timestamp': '2025-10-01 04:16:58.271387', 'step': 3486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:58.305805', 'step': 3486, 'epoch': 1} {'type': 'loss', 'content': 0.17719131708145142, 'timestamp': '2025-10-01 04:16:58.308198', 'step': 3487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:58.340700', 'step': 3487, 'epoch': 1} {'type': 'loss', 'content': 0.14913351833820343, 'timestamp': '2025-10-01 04:16:58.364729', 'step': 3488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:58.400807', 'step': 3488, 'epoch': 1} {'type': 'loss', 'content': 0.19474194943904877, 'timestamp': '2025-10-01 04:16:58.402842', 'step': 3489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:16:58.443040', 'step': 3489, 'epoch': 1} {'type': 'loss', 'content': 0.13988536596298218, 'timestamp': '2025-10-01 04:16:58.445371', 'step': 3490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:58.477721', 'step': 3490, 'epoch': 1} {'type': 'loss', 'content': 0.1961861103773117, 'timestamp': '2025-10-01 04:16:58.479878', 'step': 3491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:58.511344', 'step': 3491, 'epoch': 1} {'type': 'loss', 'content': 0.1470637172460556, 'timestamp': '2025-10-01 04:16:58.534775', 'step': 3492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:58.575216', 'step': 3492, 'epoch': 1} {'type': 'loss', 'content': 0.0929521918296814, 'timestamp': '2025-10-01 04:16:58.577192', 'step': 3493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:58.609685', 'step': 3493, 'epoch': 1} {'type': 'loss', 'content': 0.1920318305492401, 'timestamp': '2025-10-01 04:16:58.611952', 'step': 3494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:58.641724', 'step': 3494, 'epoch': 1} {'type': 'loss', 'content': 0.1414436399936676, 'timestamp': '2025-10-01 04:16:58.643767', 'step': 3495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:16:58.674652', 'step': 3495, 'epoch': 1} {'type': 'loss', 'content': 0.35991647839546204, 'timestamp': '2025-10-01 04:16:58.698047', 'step': 3496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:16:58.728795', 'step': 3496, 'epoch': 1} {'type': 'loss', 'content': 0.10714345425367355, 'timestamp': '2025-10-01 04:16:58.731181', 'step': 3497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:58.775402', 'step': 3497, 'epoch': 1} {'type': 'loss', 'content': 0.18372122943401337, 'timestamp': '2025-10-01 04:16:58.777451', 'step': 3498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:16:58.817934', 'step': 3498, 'epoch': 1} {'type': 'loss', 'content': 0.1254107505083084, 'timestamp': '2025-10-01 04:16:58.819902', 'step': 3499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:16:58.850518', 'step': 3499, 'epoch': 1} {'type': 'loss', 'content': 0.09804535657167435, 'timestamp': '2025-10-01 04:16:58.873834', 'step': 3500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-10-01 04:17:03.767856', 'step': 3500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:03.800924', 'step': 3500, 'epoch': 1} {'type': 'loss', 'content': 0.1997048407793045, 'timestamp': '2025-10-01 04:17:03.803006', 'step': 3501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:03.837010', 'step': 3501, 'epoch': 1} {'type': 'loss', 'content': 0.11803118139505386, 'timestamp': '2025-10-01 04:17:03.838946', 'step': 3502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:03.873675', 'step': 3502, 'epoch': 1} {'type': 'loss', 'content': 0.12040027976036072, 'timestamp': '2025-10-01 04:17:03.875753', 'step': 3503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:03.906983', 'step': 3503, 'epoch': 1} {'type': 'loss', 'content': 0.19532494246959686, 'timestamp': '2025-10-01 04:17:03.930525', 'step': 3504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:03.967946', 'step': 3504, 'epoch': 1} {'type': 'loss', 'content': 0.13684216141700745, 'timestamp': '2025-10-01 04:17:03.969791', 'step': 3505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:04.001091', 'step': 3505, 'epoch': 1} {'type': 'loss', 'content': 0.1279982328414917, 'timestamp': '2025-10-01 04:17:04.003507', 'step': 3506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:04.036018', 'step': 3506, 'epoch': 1} {'type': 'loss', 'content': 0.19733388721942902, 'timestamp': '2025-10-01 04:17:04.037892', 'step': 3507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:04.071219', 'step': 3507, 'epoch': 1} {'type': 'loss', 'content': 0.12788186967372894, 'timestamp': '2025-10-01 04:17:04.094808', 'step': 3508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:04.127904', 'step': 3508, 'epoch': 1} {'type': 'loss', 'content': 0.132017120718956, 'timestamp': '2025-10-01 04:17:04.129749', 'step': 3509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:04.162588', 'step': 3509, 'epoch': 1} {'type': 'loss', 'content': 0.1911393404006958, 'timestamp': '2025-10-01 04:17:04.164559', 'step': 3510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:04.195133', 'step': 3510, 'epoch': 1} {'type': 'loss', 'content': 0.23901152610778809, 'timestamp': '2025-10-01 04:17:04.197773', 'step': 3511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:04.228286', 'step': 3511, 'epoch': 1} {'type': 'loss', 'content': 0.16554813086986542, 'timestamp': '2025-10-01 04:17:04.251927', 'step': 3512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:04.282531', 'step': 3512, 'epoch': 1} {'type': 'loss', 'content': 0.20216043293476105, 'timestamp': '2025-10-01 04:17:04.284429', 'step': 3513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:04.315105', 'step': 3513, 'epoch': 1} {'type': 'loss', 'content': 0.17808517813682556, 'timestamp': '2025-10-01 04:17:04.317220', 'step': 3514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:04.355315', 'step': 3514, 'epoch': 1} {'type': 'loss', 'content': 0.28012436628341675, 'timestamp': '2025-10-01 04:17:04.357329', 'step': 3515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:04.396056', 'step': 3515, 'epoch': 1} {'type': 'loss', 'content': 0.12817153334617615, 'timestamp': '2025-10-01 04:17:04.419278', 'step': 3516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:04.454173', 'step': 3516, 'epoch': 1} {'type': 'loss', 'content': 0.15616385638713837, 'timestamp': '2025-10-01 04:17:04.456113', 'step': 3517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:04.490104', 'step': 3517, 'epoch': 1} {'type': 'loss', 'content': 0.14720848202705383, 'timestamp': '2025-10-01 04:17:04.492894', 'step': 3518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:04.525694', 'step': 3518, 'epoch': 1} {'type': 'loss', 'content': 0.2082635462284088, 'timestamp': '2025-10-01 04:17:04.528071', 'step': 3519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:04.570951', 'step': 3519, 'epoch': 1} {'type': 'loss', 'content': 0.20506466925144196, 'timestamp': '2025-10-01 04:17:04.594170', 'step': 3520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:04.640115', 'step': 3520, 'epoch': 1} {'type': 'loss', 'content': 0.18017098307609558, 'timestamp': '2025-10-01 04:17:04.641753', 'step': 3521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:04.671908', 'step': 3521, 'epoch': 1} {'type': 'loss', 'content': 0.11534471809864044, 'timestamp': '2025-10-01 04:17:04.673741', 'step': 3522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:04.714373', 'step': 3522, 'epoch': 1} {'type': 'loss', 'content': 0.20135222375392914, 'timestamp': '2025-10-01 04:17:04.716619', 'step': 3523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:04.751236', 'step': 3523, 'epoch': 1} {'type': 'loss', 'content': 0.12044306099414825, 'timestamp': '2025-10-01 04:17:04.780669', 'step': 3524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:17:04.810740', 'step': 3524, 'epoch': 1} {'type': 'loss', 'content': 0.12311577796936035, 'timestamp': '2025-10-01 04:17:04.813222', 'step': 3525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:04.849313', 'step': 3525, 'epoch': 1} {'type': 'loss', 'content': 0.15621022880077362, 'timestamp': '2025-10-01 04:17:04.851387', 'step': 3526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:04.889339', 'step': 3526, 'epoch': 1} {'type': 'loss', 'content': 0.2625824511051178, 'timestamp': '2025-10-01 04:17:04.891422', 'step': 3527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:04.931984', 'step': 3527, 'epoch': 1} {'type': 'loss', 'content': 0.22704526782035828, 'timestamp': '2025-10-01 04:17:04.955762', 'step': 3528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:04.990637', 'step': 3528, 'epoch': 1} {'type': 'loss', 'content': 0.14276771247386932, 'timestamp': '2025-10-01 04:17:04.992578', 'step': 3529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:05.030541', 'step': 3529, 'epoch': 1} {'type': 'loss', 'content': 0.2075689435005188, 'timestamp': '2025-10-01 04:17:05.032394', 'step': 3530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:05.065951', 'step': 3530, 'epoch': 1} {'type': 'loss', 'content': 0.2276296615600586, 'timestamp': '2025-10-01 04:17:05.067858', 'step': 3531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:05.099127', 'step': 3531, 'epoch': 1} {'type': 'loss', 'content': 0.13188521564006805, 'timestamp': '2025-10-01 04:17:05.122567', 'step': 3532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:05.155044', 'step': 3532, 'epoch': 1} {'type': 'loss', 'content': 0.21624448895454407, 'timestamp': '2025-10-01 04:17:05.156926', 'step': 3533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:05.187437', 'step': 3533, 'epoch': 1} {'type': 'loss', 'content': 0.19339808821678162, 'timestamp': '2025-10-01 04:17:05.190299', 'step': 3534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:05.220810', 'step': 3534, 'epoch': 1} {'type': 'loss', 'content': 0.12413926422595978, 'timestamp': '2025-10-01 04:17:05.224283', 'step': 3535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:05.254517', 'step': 3535, 'epoch': 1} {'type': 'loss', 'content': 0.18333986401557922, 'timestamp': '2025-10-01 04:17:05.278188', 'step': 3536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:05.310531', 'step': 3536, 'epoch': 1} {'type': 'loss', 'content': 0.1716243326663971, 'timestamp': '2025-10-01 04:17:05.312422', 'step': 3537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:05.342900', 'step': 3537, 'epoch': 1} {'type': 'loss', 'content': 0.07708840072154999, 'timestamp': '2025-10-01 04:17:05.344811', 'step': 3538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:05.378429', 'step': 3538, 'epoch': 1} {'type': 'loss', 'content': 0.2356712520122528, 'timestamp': '2025-10-01 04:17:05.380335', 'step': 3539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:05.417790', 'step': 3539, 'epoch': 1} {'type': 'loss', 'content': 0.23061741888523102, 'timestamp': '2025-10-01 04:17:05.441891', 'step': 3540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:05.473979', 'step': 3540, 'epoch': 1} {'type': 'loss', 'content': 0.25315672159194946, 'timestamp': '2025-10-01 04:17:05.476375', 'step': 3541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:05.511345', 'step': 3541, 'epoch': 1} {'type': 'loss', 'content': 0.2119540125131607, 'timestamp': '2025-10-01 04:17:05.513179', 'step': 3542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:05.549126', 'step': 3542, 'epoch': 1} {'type': 'loss', 'content': 0.178627148270607, 'timestamp': '2025-10-01 04:17:05.551068', 'step': 3543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:05.584789', 'step': 3543, 'epoch': 1} {'type': 'loss', 'content': 0.13360372185707092, 'timestamp': '2025-10-01 04:17:05.608198', 'step': 3544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:05.644686', 'step': 3544, 'epoch': 1} {'type': 'loss', 'content': 0.1978808492422104, 'timestamp': '2025-10-01 04:17:05.646629', 'step': 3545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:05.680762', 'step': 3545, 'epoch': 1} {'type': 'loss', 'content': 0.2368416041135788, 'timestamp': '2025-10-01 04:17:05.682668', 'step': 3546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:17:05.716149', 'step': 3546, 'epoch': 1} {'type': 'loss', 'content': 0.16440637409687042, 'timestamp': '2025-10-01 04:17:05.720358', 'step': 3547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:05.754984', 'step': 3547, 'epoch': 1} {'type': 'loss', 'content': 0.09351532906293869, 'timestamp': '2025-10-01 04:17:05.778384', 'step': 3548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:05.808320', 'step': 3548, 'epoch': 1} {'type': 'loss', 'content': 0.139304056763649, 'timestamp': '2025-10-01 04:17:05.810297', 'step': 3549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:05.840316', 'step': 3549, 'epoch': 1} {'type': 'loss', 'content': 0.20468448102474213, 'timestamp': '2025-10-01 04:17:05.842776', 'step': 3550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:05.873244', 'step': 3550, 'epoch': 1} {'type': 'loss', 'content': 0.10817383229732513, 'timestamp': '2025-10-01 04:17:05.875245', 'step': 3551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:05.913845', 'step': 3551, 'epoch': 1} {'type': 'loss', 'content': 0.10590890794992447, 'timestamp': '2025-10-01 04:17:05.936914', 'step': 3552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:05.969126', 'step': 3552, 'epoch': 1} {'type': 'loss', 'content': 0.0999731793999672, 'timestamp': '2025-10-01 04:17:05.971012', 'step': 3553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:06.003416', 'step': 3553, 'epoch': 1} {'type': 'loss', 'content': 0.10976587235927582, 'timestamp': '2025-10-01 04:17:06.005856', 'step': 3554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:06.038313', 'step': 3554, 'epoch': 1} {'type': 'loss', 'content': 0.1632368415594101, 'timestamp': '2025-10-01 04:17:06.040812', 'step': 3555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:06.088810', 'step': 3555, 'epoch': 1} {'type': 'loss', 'content': 0.18172495067119598, 'timestamp': '2025-10-01 04:17:06.112561', 'step': 3556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:06.146048', 'step': 3556, 'epoch': 1} {'type': 'loss', 'content': 0.13950662314891815, 'timestamp': '2025-10-01 04:17:06.147937', 'step': 3557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:06.179990', 'step': 3557, 'epoch': 1} {'type': 'loss', 'content': 0.18491089344024658, 'timestamp': '2025-10-01 04:17:06.183149', 'step': 3558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:06.215796', 'step': 3558, 'epoch': 1} {'type': 'loss', 'content': 0.23668158054351807, 'timestamp': '2025-10-01 04:17:06.217616', 'step': 3559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:06.247415', 'step': 3559, 'epoch': 1} {'type': 'loss', 'content': 0.08073543012142181, 'timestamp': '2025-10-01 04:17:06.270767', 'step': 3560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:06.306530', 'step': 3560, 'epoch': 1} {'type': 'loss', 'content': 0.22121156752109528, 'timestamp': '2025-10-01 04:17:06.308413', 'step': 3561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:06.340265', 'step': 3561, 'epoch': 1} {'type': 'loss', 'content': 0.19928570091724396, 'timestamp': '2025-10-01 04:17:06.342453', 'step': 3562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:06.377528', 'step': 3562, 'epoch': 1} {'type': 'loss', 'content': 0.1323130875825882, 'timestamp': '2025-10-01 04:17:06.379499', 'step': 3563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:06.413501', 'step': 3563, 'epoch': 1} {'type': 'loss', 'content': 0.1644466519355774, 'timestamp': '2025-10-01 04:17:06.438381', 'step': 3564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:06.471694', 'step': 3564, 'epoch': 1} {'type': 'loss', 'content': 0.1416545808315277, 'timestamp': '2025-10-01 04:17:06.473588', 'step': 3565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:06.510612', 'step': 3565, 'epoch': 1} {'type': 'loss', 'content': 0.24103283882141113, 'timestamp': '2025-10-01 04:17:06.513065', 'step': 3566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:06.548560', 'step': 3566, 'epoch': 1} {'type': 'loss', 'content': 0.07201701402664185, 'timestamp': '2025-10-01 04:17:06.550485', 'step': 3567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:06.581607', 'step': 3567, 'epoch': 1} {'type': 'loss', 'content': 0.12396183609962463, 'timestamp': '2025-10-01 04:17:06.605828', 'step': 3568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:06.639933', 'step': 3568, 'epoch': 1} {'type': 'loss', 'content': 0.20274819433689117, 'timestamp': '2025-10-01 04:17:06.642040', 'step': 3569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:06.674306', 'step': 3569, 'epoch': 1} {'type': 'loss', 'content': 0.20681342482566833, 'timestamp': '2025-10-01 04:17:06.676446', 'step': 3570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:06.709395', 'step': 3570, 'epoch': 1} {'type': 'loss', 'content': 0.1675727665424347, 'timestamp': '2025-10-01 04:17:06.712280', 'step': 3571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:06.743984', 'step': 3571, 'epoch': 1} {'type': 'loss', 'content': 0.16265080869197845, 'timestamp': '2025-10-01 04:17:06.767446', 'step': 3572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:06.798846', 'step': 3572, 'epoch': 1} {'type': 'loss', 'content': 0.15432772040367126, 'timestamp': '2025-10-01 04:17:06.800742', 'step': 3573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:06.830882', 'step': 3573, 'epoch': 1} {'type': 'loss', 'content': 0.22669708728790283, 'timestamp': '2025-10-01 04:17:06.832959', 'step': 3574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:06.863993', 'step': 3574, 'epoch': 1} {'type': 'loss', 'content': 0.1651470959186554, 'timestamp': '2025-10-01 04:17:06.866169', 'step': 3575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:06.897232', 'step': 3575, 'epoch': 1} {'type': 'loss', 'content': 0.24318218231201172, 'timestamp': '2025-10-01 04:17:06.920695', 'step': 3576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:06.951868', 'step': 3576, 'epoch': 1} {'type': 'loss', 'content': 0.248616561293602, 'timestamp': '2025-10-01 04:17:06.953792', 'step': 3577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:06.984429', 'step': 3577, 'epoch': 1} {'type': 'loss', 'content': 0.19654399156570435, 'timestamp': '2025-10-01 04:17:06.986443', 'step': 3578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:07.022384', 'step': 3578, 'epoch': 1} {'type': 'loss', 'content': 0.08362482488155365, 'timestamp': '2025-10-01 04:17:07.024276', 'step': 3579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:07.054592', 'step': 3579, 'epoch': 1} {'type': 'loss', 'content': 0.08477333933115005, 'timestamp': '2025-10-01 04:17:07.078007', 'step': 3580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:07.111945', 'step': 3580, 'epoch': 1} {'type': 'loss', 'content': 0.10769496113061905, 'timestamp': '2025-10-01 04:17:07.113890', 'step': 3581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:07.153191', 'step': 3581, 'epoch': 1} {'type': 'loss', 'content': 0.1405743956565857, 'timestamp': '2025-10-01 04:17:07.155771', 'step': 3582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:07.187265', 'step': 3582, 'epoch': 1} {'type': 'loss', 'content': 0.16281256079673767, 'timestamp': '2025-10-01 04:17:07.189295', 'step': 3583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:07.220266', 'step': 3583, 'epoch': 1} {'type': 'loss', 'content': 0.1498733013868332, 'timestamp': '2025-10-01 04:17:07.243821', 'step': 3584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:07.279201', 'step': 3584, 'epoch': 1} {'type': 'loss', 'content': 0.2059791386127472, 'timestamp': '2025-10-01 04:17:07.281377', 'step': 3585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:07.313540', 'step': 3585, 'epoch': 1} {'type': 'loss', 'content': 0.29128777980804443, 'timestamp': '2025-10-01 04:17:07.315574', 'step': 3586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:07.346572', 'step': 3586, 'epoch': 1} {'type': 'loss', 'content': 0.12467482686042786, 'timestamp': '2025-10-01 04:17:07.348496', 'step': 3587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:07.385404', 'step': 3587, 'epoch': 1} {'type': 'loss', 'content': 0.13666585087776184, 'timestamp': '2025-10-01 04:17:07.409348', 'step': 3588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:07.441167', 'step': 3588, 'epoch': 1} {'type': 'loss', 'content': 0.14865325391292572, 'timestamp': '2025-10-01 04:17:07.443215', 'step': 3589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:07.483004', 'step': 3589, 'epoch': 1} {'type': 'loss', 'content': 0.21517613530158997, 'timestamp': '2025-10-01 04:17:07.484915', 'step': 3590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:07.525730', 'step': 3590, 'epoch': 1} {'type': 'loss', 'content': 0.10702019929885864, 'timestamp': '2025-10-01 04:17:07.527527', 'step': 3591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:07.562440', 'step': 3591, 'epoch': 1} {'type': 'loss', 'content': 0.12854762375354767, 'timestamp': '2025-10-01 04:17:07.585678', 'step': 3592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:07.619260', 'step': 3592, 'epoch': 1} {'type': 'loss', 'content': 0.1505214422941208, 'timestamp': '2025-10-01 04:17:07.621555', 'step': 3593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:07.654940', 'step': 3593, 'epoch': 1} {'type': 'loss', 'content': 0.1890382319688797, 'timestamp': '2025-10-01 04:17:07.656818', 'step': 3594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:07.692995', 'step': 3594, 'epoch': 1} {'type': 'loss', 'content': 0.1402234137058258, 'timestamp': '2025-10-01 04:17:07.695004', 'step': 3595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:07.731513', 'step': 3595, 'epoch': 1} {'type': 'loss', 'content': 0.09358852356672287, 'timestamp': '2025-10-01 04:17:07.754929', 'step': 3596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:07.787270', 'step': 3596, 'epoch': 1} {'type': 'loss', 'content': 0.20894084870815277, 'timestamp': '2025-10-01 04:17:07.789235', 'step': 3597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:07.830826', 'step': 3597, 'epoch': 1} {'type': 'loss', 'content': 0.10573378950357437, 'timestamp': '2025-10-01 04:17:07.833634', 'step': 3598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:07.866877', 'step': 3598, 'epoch': 1} {'type': 'loss', 'content': 0.13940107822418213, 'timestamp': '2025-10-01 04:17:07.868653', 'step': 3599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:07.900627', 'step': 3599, 'epoch': 1} {'type': 'loss', 'content': 0.22997917234897614, 'timestamp': '2025-10-01 04:17:07.924182', 'step': 3600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:07.964035', 'step': 3600, 'epoch': 1} {'type': 'loss', 'content': 0.1854115128517151, 'timestamp': '2025-10-01 04:17:07.965831', 'step': 3601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:07.996370', 'step': 3601, 'epoch': 1} {'type': 'loss', 'content': 0.12206962704658508, 'timestamp': '2025-10-01 04:17:07.998090', 'step': 3602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:08.033020', 'step': 3602, 'epoch': 1} {'type': 'loss', 'content': 0.1067458912730217, 'timestamp': '2025-10-01 04:17:08.034772', 'step': 3603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:08.066408', 'step': 3603, 'epoch': 1} {'type': 'loss', 'content': 0.17885203659534454, 'timestamp': '2025-10-01 04:17:08.089708', 'step': 3604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:08.120860', 'step': 3604, 'epoch': 1} {'type': 'loss', 'content': 0.09536555409431458, 'timestamp': '2025-10-01 04:17:08.122779', 'step': 3605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:08.153645', 'step': 3605, 'epoch': 1} {'type': 'loss', 'content': 0.16113542020320892, 'timestamp': '2025-10-01 04:17:08.156037', 'step': 3606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:08.189564', 'step': 3606, 'epoch': 1} {'type': 'loss', 'content': 0.2640971839427948, 'timestamp': '2025-10-01 04:17:08.192332', 'step': 3607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:08.228035', 'step': 3607, 'epoch': 1} {'type': 'loss', 'content': 0.23933027684688568, 'timestamp': '2025-10-01 04:17:08.251426', 'step': 3608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:08.281429', 'step': 3608, 'epoch': 1} {'type': 'loss', 'content': 0.16268862783908844, 'timestamp': '2025-10-01 04:17:08.283450', 'step': 3609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:08.315564', 'step': 3609, 'epoch': 1} {'type': 'loss', 'content': 0.14120134711265564, 'timestamp': '2025-10-01 04:17:08.317501', 'step': 3610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:08.349496', 'step': 3610, 'epoch': 1} {'type': 'loss', 'content': 0.26291418075561523, 'timestamp': '2025-10-01 04:17:08.351523', 'step': 3611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:08.384283', 'step': 3611, 'epoch': 1} {'type': 'loss', 'content': 0.1964695304632187, 'timestamp': '2025-10-01 04:17:08.407890', 'step': 3612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:08.439111', 'step': 3612, 'epoch': 1} {'type': 'loss', 'content': 0.1476106196641922, 'timestamp': '2025-10-01 04:17:08.444389', 'step': 3613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:08.475060', 'step': 3613, 'epoch': 1} {'type': 'loss', 'content': 0.19054585695266724, 'timestamp': '2025-10-01 04:17:08.477435', 'step': 3614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:08.510057', 'step': 3614, 'epoch': 1} {'type': 'loss', 'content': 0.10520433634519577, 'timestamp': '2025-10-01 04:17:08.512611', 'step': 3615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:08.545025', 'step': 3615, 'epoch': 1} {'type': 'loss', 'content': 0.19471551477909088, 'timestamp': '2025-10-01 04:17:08.570059', 'step': 3616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:08.608943', 'step': 3616, 'epoch': 1} {'type': 'loss', 'content': 0.12613588571548462, 'timestamp': '2025-10-01 04:17:08.610912', 'step': 3617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:08.641428', 'step': 3617, 'epoch': 1} {'type': 'loss', 'content': 0.174076646566391, 'timestamp': '2025-10-01 04:17:08.643661', 'step': 3618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:08.674579', 'step': 3618, 'epoch': 1} {'type': 'loss', 'content': 0.17229856550693512, 'timestamp': '2025-10-01 04:17:08.676867', 'step': 3619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:08.710632', 'step': 3619, 'epoch': 1} {'type': 'loss', 'content': 0.2146741896867752, 'timestamp': '2025-10-01 04:17:08.734679', 'step': 3620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:08.766448', 'step': 3620, 'epoch': 1} {'type': 'loss', 'content': 0.1809259057044983, 'timestamp': '2025-10-01 04:17:08.768677', 'step': 3621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:08.803051', 'step': 3621, 'epoch': 1} {'type': 'loss', 'content': 0.12432478368282318, 'timestamp': '2025-10-01 04:17:08.806060', 'step': 3622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:08.839578', 'step': 3622, 'epoch': 1} {'type': 'loss', 'content': 0.10092790424823761, 'timestamp': '2025-10-01 04:17:08.841904', 'step': 3623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:08.875883', 'step': 3623, 'epoch': 1} {'type': 'loss', 'content': 0.2920754551887512, 'timestamp': '2025-10-01 04:17:08.899642', 'step': 3624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:08.932637', 'step': 3624, 'epoch': 1} {'type': 'loss', 'content': 0.21076202392578125, 'timestamp': '2025-10-01 04:17:08.934990', 'step': 3625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:08.969173', 'step': 3625, 'epoch': 1} {'type': 'loss', 'content': 0.08884111046791077, 'timestamp': '2025-10-01 04:17:08.972079', 'step': 3626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:09.007794', 'step': 3626, 'epoch': 1} {'type': 'loss', 'content': 0.14715369045734406, 'timestamp': '2025-10-01 04:17:09.011580', 'step': 3627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:09.042752', 'step': 3627, 'epoch': 1} {'type': 'loss', 'content': 0.12548093497753143, 'timestamp': '2025-10-01 04:17:09.066766', 'step': 3628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:09.101235', 'step': 3628, 'epoch': 1} {'type': 'loss', 'content': 0.23977778851985931, 'timestamp': '2025-10-01 04:17:09.105786', 'step': 3629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:09.139348', 'step': 3629, 'epoch': 1} {'type': 'loss', 'content': 0.1780083328485489, 'timestamp': '2025-10-01 04:17:09.141407', 'step': 3630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:09.172901', 'step': 3630, 'epoch': 1} {'type': 'loss', 'content': 0.19939479231834412, 'timestamp': '2025-10-01 04:17:09.175263', 'step': 3631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:09.210682', 'step': 3631, 'epoch': 1} {'type': 'loss', 'content': 0.177105113863945, 'timestamp': '2025-10-01 04:17:09.234372', 'step': 3632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:09.269855', 'step': 3632, 'epoch': 1} {'type': 'loss', 'content': 0.24001768231391907, 'timestamp': '2025-10-01 04:17:09.273987', 'step': 3633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:09.306210', 'step': 3633, 'epoch': 1} {'type': 'loss', 'content': 0.1724882423877716, 'timestamp': '2025-10-01 04:17:09.308290', 'step': 3634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:09.342705', 'step': 3634, 'epoch': 1} {'type': 'loss', 'content': 0.10370787978172302, 'timestamp': '2025-10-01 04:17:09.346753', 'step': 3635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:09.379333', 'step': 3635, 'epoch': 1} {'type': 'loss', 'content': 0.23135755956172943, 'timestamp': '2025-10-01 04:17:09.403110', 'step': 3636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:09.438229', 'step': 3636, 'epoch': 1} {'type': 'loss', 'content': 0.15350528061389923, 'timestamp': '2025-10-01 04:17:09.440436', 'step': 3637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:09.471852', 'step': 3637, 'epoch': 1} {'type': 'loss', 'content': 0.13939036428928375, 'timestamp': '2025-10-01 04:17:09.474037', 'step': 3638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:09.514139', 'step': 3638, 'epoch': 1} {'type': 'loss', 'content': 0.1558600217103958, 'timestamp': '2025-10-01 04:17:09.516866', 'step': 3639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:09.548308', 'step': 3639, 'epoch': 1} {'type': 'loss', 'content': 0.1863521933555603, 'timestamp': '2025-10-01 04:17:09.572126', 'step': 3640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:09.608687', 'step': 3640, 'epoch': 1} {'type': 'loss', 'content': 0.21208448708057404, 'timestamp': '2025-10-01 04:17:09.610653', 'step': 3641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:09.644323', 'step': 3641, 'epoch': 1} {'type': 'loss', 'content': 0.19050034880638123, 'timestamp': '2025-10-01 04:17:09.646649', 'step': 3642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:09.681087', 'step': 3642, 'epoch': 1} {'type': 'loss', 'content': 0.13829824328422546, 'timestamp': '2025-10-01 04:17:09.683278', 'step': 3643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:09.720606', 'step': 3643, 'epoch': 1} {'type': 'loss', 'content': 0.10342122614383698, 'timestamp': '2025-10-01 04:17:09.743855', 'step': 3644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:09.776190', 'step': 3644, 'epoch': 1} {'type': 'loss', 'content': 0.09117428958415985, 'timestamp': '2025-10-01 04:17:09.787646', 'step': 3645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:09.824393', 'step': 3645, 'epoch': 1} {'type': 'loss', 'content': 0.1433274745941162, 'timestamp': '2025-10-01 04:17:09.827202', 'step': 3646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:09.862430', 'step': 3646, 'epoch': 1} {'type': 'loss', 'content': 0.15555542707443237, 'timestamp': '2025-10-01 04:17:09.864962', 'step': 3647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:09.899515', 'step': 3647, 'epoch': 1} {'type': 'loss', 'content': 0.262368381023407, 'timestamp': '2025-10-01 04:17:09.926035', 'step': 3648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:09.958155', 'step': 3648, 'epoch': 1} {'type': 'loss', 'content': 0.18708845973014832, 'timestamp': '2025-10-01 04:17:09.959963', 'step': 3649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:09.991025', 'step': 3649, 'epoch': 1} {'type': 'loss', 'content': 0.2873933017253876, 'timestamp': '2025-10-01 04:17:09.993501', 'step': 3650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:10.029045', 'step': 3650, 'epoch': 1} {'type': 'loss', 'content': 0.14022484421730042, 'timestamp': '2025-10-01 04:17:10.030988', 'step': 3651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-10-01 04:17:10.079396', 'step': 3651, 'epoch': 1} {'type': 'loss', 'content': 0.12078864872455597, 'timestamp': '2025-10-01 04:17:10.109900', 'step': 3652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:10.142584', 'step': 3652, 'epoch': 1} {'type': 'loss', 'content': 0.13733579218387604, 'timestamp': '2025-10-01 04:17:10.144648', 'step': 3653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:10.178571', 'step': 3653, 'epoch': 1} {'type': 'loss', 'content': 0.14856205880641937, 'timestamp': '2025-10-01 04:17:10.180913', 'step': 3654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:10.215884', 'step': 3654, 'epoch': 1} {'type': 'loss', 'content': 0.1344698816537857, 'timestamp': '2025-10-01 04:17:10.217902', 'step': 3655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:10.252893', 'step': 3655, 'epoch': 1} {'type': 'loss', 'content': 0.14622530341148376, 'timestamp': '2025-10-01 04:17:10.276218', 'step': 3656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:10.312718', 'step': 3656, 'epoch': 1} {'type': 'loss', 'content': 0.1947174072265625, 'timestamp': '2025-10-01 04:17:10.314570', 'step': 3657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:10.346246', 'step': 3657, 'epoch': 1} {'type': 'loss', 'content': 0.15062369406223297, 'timestamp': '2025-10-01 04:17:10.348166', 'step': 3658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:10.379008', 'step': 3658, 'epoch': 1} {'type': 'loss', 'content': 0.14494001865386963, 'timestamp': '2025-10-01 04:17:10.381032', 'step': 3659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:10.419045', 'step': 3659, 'epoch': 1} {'type': 'loss', 'content': 0.2961050570011139, 'timestamp': '2025-10-01 04:17:10.442577', 'step': 3660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:10.474882', 'step': 3660, 'epoch': 1} {'type': 'loss', 'content': 0.19854363799095154, 'timestamp': '2025-10-01 04:17:10.476630', 'step': 3661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:10.510385', 'step': 3661, 'epoch': 1} {'type': 'loss', 'content': 0.19526135921478271, 'timestamp': '2025-10-01 04:17:10.512657', 'step': 3662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:10.547448', 'step': 3662, 'epoch': 1} {'type': 'loss', 'content': 0.26061517000198364, 'timestamp': '2025-10-01 04:17:10.549105', 'step': 3663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:10.579979', 'step': 3663, 'epoch': 1} {'type': 'loss', 'content': 0.15126220881938934, 'timestamp': '2025-10-01 04:17:10.603641', 'step': 3664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:17:10.637052', 'step': 3664, 'epoch': 1} {'type': 'loss', 'content': 0.1354764699935913, 'timestamp': '2025-10-01 04:17:10.639950', 'step': 3665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:10.670535', 'step': 3665, 'epoch': 1} {'type': 'loss', 'content': 0.20521549880504608, 'timestamp': '2025-10-01 04:17:10.672309', 'step': 3666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:10.706914', 'step': 3666, 'epoch': 1} {'type': 'loss', 'content': 0.19951191544532776, 'timestamp': '2025-10-01 04:17:10.709163', 'step': 3667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:10.749582', 'step': 3667, 'epoch': 1} {'type': 'loss', 'content': 0.16313979029655457, 'timestamp': '2025-10-01 04:17:10.773788', 'step': 3668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:10.814358', 'step': 3668, 'epoch': 1} {'type': 'loss', 'content': 0.1257132589817047, 'timestamp': '2025-10-01 04:17:10.816406', 'step': 3669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:10.849999', 'step': 3669, 'epoch': 1} {'type': 'loss', 'content': 0.1540578007698059, 'timestamp': '2025-10-01 04:17:10.853507', 'step': 3670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:10.887833', 'step': 3670, 'epoch': 1} {'type': 'loss', 'content': 0.15874481201171875, 'timestamp': '2025-10-01 04:17:10.890017', 'step': 3671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:10.928483', 'step': 3671, 'epoch': 1} {'type': 'loss', 'content': 0.2423410415649414, 'timestamp': '2025-10-01 04:17:10.951878', 'step': 3672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:10.987307', 'step': 3672, 'epoch': 1} {'type': 'loss', 'content': 0.10372405499219894, 'timestamp': '2025-10-01 04:17:10.989227', 'step': 3673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:11.020584', 'step': 3673, 'epoch': 1} {'type': 'loss', 'content': 0.17165522277355194, 'timestamp': '2025-10-01 04:17:11.022305', 'step': 3674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.053625', 'step': 3674, 'epoch': 1} {'type': 'loss', 'content': 0.2700403928756714, 'timestamp': '2025-10-01 04:17:11.055336', 'step': 3675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:11.086163', 'step': 3675, 'epoch': 1} {'type': 'loss', 'content': 0.07303988188505173, 'timestamp': '2025-10-01 04:17:11.109839', 'step': 3676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:11.141366', 'step': 3676, 'epoch': 1} {'type': 'loss', 'content': 0.15817488729953766, 'timestamp': '2025-10-01 04:17:11.143280', 'step': 3677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:11.177129', 'step': 3677, 'epoch': 1} {'type': 'loss', 'content': 0.14759579300880432, 'timestamp': '2025-10-01 04:17:11.178849', 'step': 3678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.213212', 'step': 3678, 'epoch': 1} {'type': 'loss', 'content': 0.1501365751028061, 'timestamp': '2025-10-01 04:17:11.215115', 'step': 3679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.248142', 'step': 3679, 'epoch': 1} {'type': 'loss', 'content': 0.20968101918697357, 'timestamp': '2025-10-01 04:17:11.271441', 'step': 3680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:17:11.305545', 'step': 3680, 'epoch': 1} {'type': 'loss', 'content': 0.14702096581459045, 'timestamp': '2025-10-01 04:17:11.307950', 'step': 3681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.341858', 'step': 3681, 'epoch': 1} {'type': 'loss', 'content': 0.12201010435819626, 'timestamp': '2025-10-01 04:17:11.343859', 'step': 3682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.377161', 'step': 3682, 'epoch': 1} {'type': 'loss', 'content': 0.15332946181297302, 'timestamp': '2025-10-01 04:17:11.379214', 'step': 3683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.416430', 'step': 3683, 'epoch': 1} {'type': 'loss', 'content': 0.15664483606815338, 'timestamp': '2025-10-01 04:17:11.439941', 'step': 3684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:11.476771', 'step': 3684, 'epoch': 1} {'type': 'loss', 'content': 0.12617145478725433, 'timestamp': '2025-10-01 04:17:11.479204', 'step': 3685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.514137', 'step': 3685, 'epoch': 1} {'type': 'loss', 'content': 0.32730621099472046, 'timestamp': '2025-10-01 04:17:11.516025', 'step': 3686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:11.546911', 'step': 3686, 'epoch': 1} {'type': 'loss', 'content': 0.19299615919589996, 'timestamp': '2025-10-01 04:17:11.549353', 'step': 3687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.581946', 'step': 3687, 'epoch': 1} {'type': 'loss', 'content': 0.1888592392206192, 'timestamp': '2025-10-01 04:17:11.605192', 'step': 3688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:11.638198', 'step': 3688, 'epoch': 1} {'type': 'loss', 'content': 0.18742993474006653, 'timestamp': '2025-10-01 04:17:11.642785', 'step': 3689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:11.673134', 'step': 3689, 'epoch': 1} {'type': 'loss', 'content': 0.22014261782169342, 'timestamp': '2025-10-01 04:17:11.675742', 'step': 3690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:11.705874', 'step': 3690, 'epoch': 1} {'type': 'loss', 'content': 0.1622685194015503, 'timestamp': '2025-10-01 04:17:11.707757', 'step': 3691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:11.742237', 'step': 3691, 'epoch': 1} {'type': 'loss', 'content': 0.13774356245994568, 'timestamp': '2025-10-01 04:17:11.765693', 'step': 3692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:11.796842', 'step': 3692, 'epoch': 1} {'type': 'loss', 'content': 0.14846178889274597, 'timestamp': '2025-10-01 04:17:11.798812', 'step': 3693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.838332', 'step': 3693, 'epoch': 1} {'type': 'loss', 'content': 0.25135788321495056, 'timestamp': '2025-10-01 04:17:11.840268', 'step': 3694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.881791', 'step': 3694, 'epoch': 1} {'type': 'loss', 'content': 0.2610389292240143, 'timestamp': '2025-10-01 04:17:11.883323', 'step': 3695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:11.915314', 'step': 3695, 'epoch': 1} {'type': 'loss', 'content': 0.1303919106721878, 'timestamp': '2025-10-01 04:17:11.938942', 'step': 3696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:17:11.970934', 'step': 3696, 'epoch': 1} {'type': 'loss', 'content': 0.0725289061665535, 'timestamp': '2025-10-01 04:17:11.972970', 'step': 3697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.012904', 'step': 3697, 'epoch': 1} {'type': 'loss', 'content': 0.1849752962589264, 'timestamp': '2025-10-01 04:17:12.014436', 'step': 3698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.046193', 'step': 3698, 'epoch': 1} {'type': 'loss', 'content': 0.11237412691116333, 'timestamp': '2025-10-01 04:17:12.047904', 'step': 3699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:12.078595', 'step': 3699, 'epoch': 1} {'type': 'loss', 'content': 0.07770276069641113, 'timestamp': '2025-10-01 04:17:12.101968', 'step': 3700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:12.134243', 'step': 3700, 'epoch': 1} {'type': 'loss', 'content': 0.2023431807756424, 'timestamp': '2025-10-01 04:17:12.136384', 'step': 3701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.169025', 'step': 3701, 'epoch': 1} {'type': 'loss', 'content': 0.14541254937648773, 'timestamp': '2025-10-01 04:17:12.171482', 'step': 3702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:12.201512', 'step': 3702, 'epoch': 1} {'type': 'loss', 'content': 0.17003925144672394, 'timestamp': '2025-10-01 04:17:12.204637', 'step': 3703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:12.239787', 'step': 3703, 'epoch': 1} {'type': 'loss', 'content': 0.24418535828590393, 'timestamp': '2025-10-01 04:17:12.264235', 'step': 3704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:12.300042', 'step': 3704, 'epoch': 1} {'type': 'loss', 'content': 0.15075692534446716, 'timestamp': '2025-10-01 04:17:12.310096', 'step': 3705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.347592', 'step': 3705, 'epoch': 1} {'type': 'loss', 'content': 0.135189026594162, 'timestamp': '2025-10-01 04:17:12.354758', 'step': 3706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.392527', 'step': 3706, 'epoch': 1} {'type': 'loss', 'content': 0.2265370786190033, 'timestamp': '2025-10-01 04:17:12.395368', 'step': 3707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.430393', 'step': 3707, 'epoch': 1} {'type': 'loss', 'content': 0.19057263433933258, 'timestamp': '2025-10-01 04:17:12.461290', 'step': 3708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:12.495532', 'step': 3708, 'epoch': 1} {'type': 'loss', 'content': 0.15262478590011597, 'timestamp': '2025-10-01 04:17:12.497257', 'step': 3709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:12.532626', 'step': 3709, 'epoch': 1} {'type': 'loss', 'content': 0.12805047631263733, 'timestamp': '2025-10-01 04:17:12.534952', 'step': 3710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:12.565051', 'step': 3710, 'epoch': 1} {'type': 'loss', 'content': 0.193775475025177, 'timestamp': '2025-10-01 04:17:12.566845', 'step': 3711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:12.597844', 'step': 3711, 'epoch': 1} {'type': 'loss', 'content': 0.1375112533569336, 'timestamp': '2025-10-01 04:17:12.621587', 'step': 3712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.655709', 'step': 3712, 'epoch': 1} {'type': 'loss', 'content': 0.23440036177635193, 'timestamp': '2025-10-01 04:17:12.659055', 'step': 3713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.695097', 'step': 3713, 'epoch': 1} {'type': 'loss', 'content': 0.17451982200145721, 'timestamp': '2025-10-01 04:17:12.696911', 'step': 3714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:12.732468', 'step': 3714, 'epoch': 1} {'type': 'loss', 'content': 0.19938521087169647, 'timestamp': '2025-10-01 04:17:12.734544', 'step': 3715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.766673', 'step': 3715, 'epoch': 1} {'type': 'loss', 'content': 0.2640129625797272, 'timestamp': '2025-10-01 04:17:12.790200', 'step': 3716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.822072', 'step': 3716, 'epoch': 1} {'type': 'loss', 'content': 0.13385948538780212, 'timestamp': '2025-10-01 04:17:12.824227', 'step': 3717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:12.854680', 'step': 3717, 'epoch': 1} {'type': 'loss', 'content': 0.1698390394449234, 'timestamp': '2025-10-01 04:17:12.856666', 'step': 3718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:12.891573', 'step': 3718, 'epoch': 1} {'type': 'loss', 'content': 0.1630614697933197, 'timestamp': '2025-10-01 04:17:12.893246', 'step': 3719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:12.930035', 'step': 3719, 'epoch': 1} {'type': 'loss', 'content': 0.1348964273929596, 'timestamp': '2025-10-01 04:17:12.953441', 'step': 3720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:12.985619', 'step': 3720, 'epoch': 1} {'type': 'loss', 'content': 0.17569445073604584, 'timestamp': '2025-10-01 04:17:12.987419', 'step': 3721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:13.024352', 'step': 3721, 'epoch': 1} {'type': 'loss', 'content': 0.11987848579883575, 'timestamp': '2025-10-01 04:17:13.026331', 'step': 3722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:13.062235', 'step': 3722, 'epoch': 1} {'type': 'loss', 'content': 0.21449072659015656, 'timestamp': '2025-10-01 04:17:13.064915', 'step': 3723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:13.100163', 'step': 3723, 'epoch': 1} {'type': 'loss', 'content': 0.21545885503292084, 'timestamp': '2025-10-01 04:17:13.123756', 'step': 3724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:13.158452', 'step': 3724, 'epoch': 1} {'type': 'loss', 'content': 0.13050100207328796, 'timestamp': '2025-10-01 04:17:13.160371', 'step': 3725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:13.196903', 'step': 3725, 'epoch': 1} {'type': 'loss', 'content': 0.1817346215248108, 'timestamp': '2025-10-01 04:17:13.199277', 'step': 3726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:13.231407', 'step': 3726, 'epoch': 1} {'type': 'loss', 'content': 0.16764317452907562, 'timestamp': '2025-10-01 04:17:13.233977', 'step': 3727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:13.268141', 'step': 3727, 'epoch': 1} {'type': 'loss', 'content': 0.15100790560245514, 'timestamp': '2025-10-01 04:17:13.291425', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:17:22.899185', 'step': 3728, 'epoch': 1} {'type': 'pplx', 'content': 9369.409831549003, 'timestamp': '2025-10-01 04:17:22.901561', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:22.932849', 'step': 3728, 'epoch': 1} {'type': 'loss', 'content': 0.12471204251050949, 'timestamp': '2025-10-01 04:17:22.934543', 'step': 3729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:22.968917', 'step': 3729, 'epoch': 1} {'type': 'loss', 'content': 0.18991060554981232, 'timestamp': '2025-10-01 04:17:22.970893', 'step': 3730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:23.005542', 'step': 3730, 'epoch': 1} {'type': 'loss', 'content': 0.19593490660190582, 'timestamp': '2025-10-01 04:17:23.016121', 'step': 3731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:23.053235', 'step': 3731, 'epoch': 1} {'type': 'loss', 'content': 0.2700883448123932, 'timestamp': '2025-10-01 04:17:23.076810', 'step': 3732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:23.112923', 'step': 3732, 'epoch': 1} {'type': 'loss', 'content': 0.1728287935256958, 'timestamp': '2025-10-01 04:17:23.114948', 'step': 3733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:23.146700', 'step': 3733, 'epoch': 1} {'type': 'loss', 'content': 0.20472882688045502, 'timestamp': '2025-10-01 04:17:23.148563', 'step': 3734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:23.183568', 'step': 3734, 'epoch': 1} {'type': 'loss', 'content': 0.18291862308979034, 'timestamp': '2025-10-01 04:17:23.185437', 'step': 3735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:23.220191', 'step': 3735, 'epoch': 1} {'type': 'loss', 'content': 0.21914957463741302, 'timestamp': '2025-10-01 04:17:23.244093', 'step': 3736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:23.275741', 'step': 3736, 'epoch': 1} {'type': 'loss', 'content': 0.15957807004451752, 'timestamp': '2025-10-01 04:17:23.277667', 'step': 3737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:23.309846', 'step': 3737, 'epoch': 1} {'type': 'loss', 'content': 0.07980702072381973, 'timestamp': '2025-10-01 04:17:23.311971', 'step': 3738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:23.349400', 'step': 3738, 'epoch': 1} {'type': 'loss', 'content': 0.1720680147409439, 'timestamp': '2025-10-01 04:17:23.351897', 'step': 3739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:23.391056', 'step': 3739, 'epoch': 1} {'type': 'loss', 'content': 0.1891024112701416, 'timestamp': '2025-10-01 04:17:23.418848', 'step': 3740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:23.461609', 'step': 3740, 'epoch': 1} {'type': 'loss', 'content': 0.20654956996440887, 'timestamp': '2025-10-01 04:17:23.463436', 'step': 3741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:23.498801', 'step': 3741, 'epoch': 1} {'type': 'loss', 'content': 0.1047472357749939, 'timestamp': '2025-10-01 04:17:23.502096', 'step': 3742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:23.532950', 'step': 3742, 'epoch': 1} {'type': 'loss', 'content': 0.1548311859369278, 'timestamp': '2025-10-01 04:17:23.534738', 'step': 3743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:23.575337', 'step': 3743, 'epoch': 1} {'type': 'loss', 'content': 0.22332701086997986, 'timestamp': '2025-10-01 04:17:23.598730', 'step': 3744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:23.633126', 'step': 3744, 'epoch': 1} {'type': 'loss', 'content': 0.1535397469997406, 'timestamp': '2025-10-01 04:17:23.635133', 'step': 3745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:23.667664', 'step': 3745, 'epoch': 1} {'type': 'loss', 'content': 0.11823530495166779, 'timestamp': '2025-10-01 04:17:23.669985', 'step': 3746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:23.703144', 'step': 3746, 'epoch': 1} {'type': 'loss', 'content': 0.14639556407928467, 'timestamp': '2025-10-01 04:17:23.705117', 'step': 3747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:23.740864', 'step': 3747, 'epoch': 1} {'type': 'loss', 'content': 0.14246894419193268, 'timestamp': '2025-10-01 04:17:23.764751', 'step': 3748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:23.795664', 'step': 3748, 'epoch': 1} {'type': 'loss', 'content': 0.24524304270744324, 'timestamp': '2025-10-01 04:17:23.797769', 'step': 3749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:23.834419', 'step': 3749, 'epoch': 1} {'type': 'loss', 'content': 0.1585301160812378, 'timestamp': '2025-10-01 04:17:23.836231', 'step': 3750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:23.871237', 'step': 3750, 'epoch': 1} {'type': 'loss', 'content': 0.19985413551330566, 'timestamp': '2025-10-01 04:17:23.873889', 'step': 3751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:23.905247', 'step': 3751, 'epoch': 1} {'type': 'loss', 'content': 0.08810971677303314, 'timestamp': '2025-10-01 04:17:23.928676', 'step': 3752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:23.960423', 'step': 3752, 'epoch': 1} {'type': 'loss', 'content': 0.18312589824199677, 'timestamp': '2025-10-01 04:17:23.962408', 'step': 3753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:24.000395', 'step': 3753, 'epoch': 1} {'type': 'loss', 'content': 0.12510186433792114, 'timestamp': '2025-10-01 04:17:24.002408', 'step': 3754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:24.038771', 'step': 3754, 'epoch': 1} {'type': 'loss', 'content': 0.1474648267030716, 'timestamp': '2025-10-01 04:17:24.041165', 'step': 3755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:24.084972', 'step': 3755, 'epoch': 1} {'type': 'loss', 'content': 0.19316840171813965, 'timestamp': '2025-10-01 04:17:24.108509', 'step': 3756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:24.145309', 'step': 3756, 'epoch': 1} {'type': 'loss', 'content': 0.1598527431488037, 'timestamp': '2025-10-01 04:17:24.147304', 'step': 3757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:17:24.179716', 'step': 3757, 'epoch': 1} {'type': 'loss', 'content': 0.26187002658843994, 'timestamp': '2025-10-01 04:17:24.184415', 'step': 3758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:24.216770', 'step': 3758, 'epoch': 1} {'type': 'loss', 'content': 0.11494503170251846, 'timestamp': '2025-10-01 04:17:24.218676', 'step': 3759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:24.258678', 'step': 3759, 'epoch': 1} {'type': 'loss', 'content': 0.0943482518196106, 'timestamp': '2025-10-01 04:17:24.282780', 'step': 3760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:24.317992', 'step': 3760, 'epoch': 1} {'type': 'loss', 'content': 0.1271088868379593, 'timestamp': '2025-10-01 04:17:24.319927', 'step': 3761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:24.354478', 'step': 3761, 'epoch': 1} {'type': 'loss', 'content': 0.12495075911283493, 'timestamp': '2025-10-01 04:17:24.357255', 'step': 3762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:24.389541', 'step': 3762, 'epoch': 1} {'type': 'loss', 'content': 0.15906424820423126, 'timestamp': '2025-10-01 04:17:24.391640', 'step': 3763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:24.424387', 'step': 3763, 'epoch': 1} {'type': 'loss', 'content': 0.13897576928138733, 'timestamp': '2025-10-01 04:17:24.448057', 'step': 3764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:24.488802', 'step': 3764, 'epoch': 1} {'type': 'loss', 'content': 0.10003949701786041, 'timestamp': '2025-10-01 04:17:24.490944', 'step': 3765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:24.525111', 'step': 3765, 'epoch': 1} {'type': 'loss', 'content': 0.0704546719789505, 'timestamp': '2025-10-01 04:17:24.527222', 'step': 3766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:24.564549', 'step': 3766, 'epoch': 1} {'type': 'loss', 'content': 0.10924773663282394, 'timestamp': '2025-10-01 04:17:24.566854', 'step': 3767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:24.603183', 'step': 3767, 'epoch': 1} {'type': 'loss', 'content': 0.21056200563907623, 'timestamp': '2025-10-01 04:17:24.628692', 'step': 3768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:24.662785', 'step': 3768, 'epoch': 1} {'type': 'loss', 'content': 0.18537726998329163, 'timestamp': '2025-10-01 04:17:24.665009', 'step': 3769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:24.696261', 'step': 3769, 'epoch': 1} {'type': 'loss', 'content': 0.11693284660577774, 'timestamp': '2025-10-01 04:17:24.699287', 'step': 3770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:24.730454', 'step': 3770, 'epoch': 1} {'type': 'loss', 'content': 0.11790186911821365, 'timestamp': '2025-10-01 04:17:24.732498', 'step': 3771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:24.767549', 'step': 3771, 'epoch': 1} {'type': 'loss', 'content': 0.12911520898342133, 'timestamp': '2025-10-01 04:17:24.790916', 'step': 3772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:24.823036', 'step': 3772, 'epoch': 1} {'type': 'loss', 'content': 0.16518184542655945, 'timestamp': '2025-10-01 04:17:24.825316', 'step': 3773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:24.855978', 'step': 3773, 'epoch': 1} {'type': 'loss', 'content': 0.23061981797218323, 'timestamp': '2025-10-01 04:17:24.858395', 'step': 3774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:24.889518', 'step': 3774, 'epoch': 1} {'type': 'loss', 'content': 0.05844732001423836, 'timestamp': '2025-10-01 04:17:24.891617', 'step': 3775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:24.924509', 'step': 3775, 'epoch': 1} {'type': 'loss', 'content': 0.11665268242359161, 'timestamp': '2025-10-01 04:17:24.948134', 'step': 3776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:24.978860', 'step': 3776, 'epoch': 1} {'type': 'loss', 'content': 0.16289451718330383, 'timestamp': '2025-10-01 04:17:24.980767', 'step': 3777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:25.011993', 'step': 3777, 'epoch': 1} {'type': 'loss', 'content': 0.18720698356628418, 'timestamp': '2025-10-01 04:17:25.014098', 'step': 3778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:25.049006', 'step': 3778, 'epoch': 1} {'type': 'loss', 'content': 0.12893152236938477, 'timestamp': '2025-10-01 04:17:25.051180', 'step': 3779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:25.081282', 'step': 3779, 'epoch': 1} {'type': 'loss', 'content': 0.13718833029270172, 'timestamp': '2025-10-01 04:17:25.104594', 'step': 3780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:25.135190', 'step': 3780, 'epoch': 1} {'type': 'loss', 'content': 0.1423470675945282, 'timestamp': '2025-10-01 04:17:25.138531', 'step': 3781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:25.169529', 'step': 3781, 'epoch': 1} {'type': 'loss', 'content': 0.09246741980314255, 'timestamp': '2025-10-01 04:17:25.171893', 'step': 3782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:25.212362', 'step': 3782, 'epoch': 1} {'type': 'loss', 'content': 0.19195853173732758, 'timestamp': '2025-10-01 04:17:25.215231', 'step': 3783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:25.250010', 'step': 3783, 'epoch': 1} {'type': 'loss', 'content': 0.11537019163370132, 'timestamp': '2025-10-01 04:17:25.273789', 'step': 3784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:25.306479', 'step': 3784, 'epoch': 1} {'type': 'loss', 'content': 0.19146457314491272, 'timestamp': '2025-10-01 04:17:25.308500', 'step': 3785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:25.346394', 'step': 3785, 'epoch': 1} {'type': 'loss', 'content': 0.09187306463718414, 'timestamp': '2025-10-01 04:17:25.348660', 'step': 3786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:25.385884', 'step': 3786, 'epoch': 1} {'type': 'loss', 'content': 0.14564268290996552, 'timestamp': '2025-10-01 04:17:25.388090', 'step': 3787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:17:25.424988', 'step': 3787, 'epoch': 1} {'type': 'loss', 'content': 0.16353203356266022, 'timestamp': '2025-10-01 04:17:25.450379', 'step': 3788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:25.491133', 'step': 3788, 'epoch': 1} {'type': 'loss', 'content': 0.2029617577791214, 'timestamp': '2025-10-01 04:17:25.493370', 'step': 3789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:25.526599', 'step': 3789, 'epoch': 1} {'type': 'loss', 'content': 0.15062637627124786, 'timestamp': '2025-10-01 04:17:25.528960', 'step': 3790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:25.565566', 'step': 3790, 'epoch': 1} {'type': 'loss', 'content': 0.1547050029039383, 'timestamp': '2025-10-01 04:17:25.567843', 'step': 3791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:25.604315', 'step': 3791, 'epoch': 1} {'type': 'loss', 'content': 0.15807019174098969, 'timestamp': '2025-10-01 04:17:25.627929', 'step': 3792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:25.665114', 'step': 3792, 'epoch': 1} {'type': 'loss', 'content': 0.2579161524772644, 'timestamp': '2025-10-01 04:17:25.669120', 'step': 3793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:25.700021', 'step': 3793, 'epoch': 1} {'type': 'loss', 'content': 0.18925103545188904, 'timestamp': '2025-10-01 04:17:25.702125', 'step': 3794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:25.734803', 'step': 3794, 'epoch': 1} {'type': 'loss', 'content': 0.21477460861206055, 'timestamp': '2025-10-01 04:17:25.736789', 'step': 3795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:25.772802', 'step': 3795, 'epoch': 1} {'type': 'loss', 'content': 0.1476103812456131, 'timestamp': '2025-10-01 04:17:25.802781', 'step': 3796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:25.837274', 'step': 3796, 'epoch': 1} {'type': 'loss', 'content': 0.06914854794740677, 'timestamp': '2025-10-01 04:17:25.839510', 'step': 3797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:25.870582', 'step': 3797, 'epoch': 1} {'type': 'loss', 'content': 0.17849169671535492, 'timestamp': '2025-10-01 04:17:25.873457', 'step': 3798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:25.910711', 'step': 3798, 'epoch': 1} {'type': 'loss', 'content': 0.2925865650177002, 'timestamp': '2025-10-01 04:17:25.912991', 'step': 3799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:25.954542', 'step': 3799, 'epoch': 1} {'type': 'loss', 'content': 0.0872323140501976, 'timestamp': '2025-10-01 04:17:25.978453', 'step': 3800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.020383', 'step': 3800, 'epoch': 1} {'type': 'loss', 'content': 0.1158721074461937, 'timestamp': '2025-10-01 04:17:26.024304', 'step': 3801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.062209', 'step': 3801, 'epoch': 1} {'type': 'loss', 'content': 0.1325828731060028, 'timestamp': '2025-10-01 04:17:26.064218', 'step': 3802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.097195', 'step': 3802, 'epoch': 1} {'type': 'loss', 'content': 0.11460801213979721, 'timestamp': '2025-10-01 04:17:26.099353', 'step': 3803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:26.132080', 'step': 3803, 'epoch': 1} {'type': 'loss', 'content': 0.2133929282426834, 'timestamp': '2025-10-01 04:17:26.155783', 'step': 3804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:26.195707', 'step': 3804, 'epoch': 1} {'type': 'loss', 'content': 0.14946754276752472, 'timestamp': '2025-10-01 04:17:26.198114', 'step': 3805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:26.236245', 'step': 3805, 'epoch': 1} {'type': 'loss', 'content': 0.13944661617279053, 'timestamp': '2025-10-01 04:17:26.240386', 'step': 3806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:26.286886', 'step': 3806, 'epoch': 1} {'type': 'loss', 'content': 0.21289357542991638, 'timestamp': '2025-10-01 04:17:26.291239', 'step': 3807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:26.323044', 'step': 3807, 'epoch': 1} {'type': 'loss', 'content': 0.09410703182220459, 'timestamp': '2025-10-01 04:17:26.346696', 'step': 3808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.379847', 'step': 3808, 'epoch': 1} {'type': 'loss', 'content': 0.16277265548706055, 'timestamp': '2025-10-01 04:17:26.385051', 'step': 3809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:26.418357', 'step': 3809, 'epoch': 1} {'type': 'loss', 'content': 0.1722906082868576, 'timestamp': '2025-10-01 04:17:26.428710', 'step': 3810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:26.465541', 'step': 3810, 'epoch': 1} {'type': 'loss', 'content': 0.1346084475517273, 'timestamp': '2025-10-01 04:17:26.467884', 'step': 3811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.498766', 'step': 3811, 'epoch': 1} {'type': 'loss', 'content': 0.1944870799779892, 'timestamp': '2025-10-01 04:17:26.523610', 'step': 3812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:26.557514', 'step': 3812, 'epoch': 1} {'type': 'loss', 'content': 0.16161291301250458, 'timestamp': '2025-10-01 04:17:26.559602', 'step': 3813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.592472', 'step': 3813, 'epoch': 1} {'type': 'loss', 'content': 0.15621623396873474, 'timestamp': '2025-10-01 04:17:26.594333', 'step': 3814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.630984', 'step': 3814, 'epoch': 1} {'type': 'loss', 'content': 0.11144014447927475, 'timestamp': '2025-10-01 04:17:26.632849', 'step': 3815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:26.669595', 'step': 3815, 'epoch': 1} {'type': 'loss', 'content': 0.21616245806217194, 'timestamp': '2025-10-01 04:17:26.693077', 'step': 3816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:26.731092', 'step': 3816, 'epoch': 1} {'type': 'loss', 'content': 0.1473836749792099, 'timestamp': '2025-10-01 04:17:26.733940', 'step': 3817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:26.771572', 'step': 3817, 'epoch': 1} {'type': 'loss', 'content': 0.10437805950641632, 'timestamp': '2025-10-01 04:17:26.773908', 'step': 3818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:26.804864', 'step': 3818, 'epoch': 1} {'type': 'loss', 'content': 0.17237019538879395, 'timestamp': '2025-10-01 04:17:26.806770', 'step': 3819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.840444', 'step': 3819, 'epoch': 1} {'type': 'loss', 'content': 0.14140231907367706, 'timestamp': '2025-10-01 04:17:26.863896', 'step': 3820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.894804', 'step': 3820, 'epoch': 1} {'type': 'loss', 'content': 0.10832652449607849, 'timestamp': '2025-10-01 04:17:26.896684', 'step': 3821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.936806', 'step': 3821, 'epoch': 1} {'type': 'loss', 'content': 0.18299207091331482, 'timestamp': '2025-10-01 04:17:26.939339', 'step': 3822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:26.977844', 'step': 3822, 'epoch': 1} {'type': 'loss', 'content': 0.10479576140642166, 'timestamp': '2025-10-01 04:17:26.979765', 'step': 3823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:27.016495', 'step': 3823, 'epoch': 1} {'type': 'loss', 'content': 0.1960991770029068, 'timestamp': '2025-10-01 04:17:27.039856', 'step': 3824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:27.085876', 'step': 3824, 'epoch': 1} {'type': 'loss', 'content': 0.10012555122375488, 'timestamp': '2025-10-01 04:17:27.092795', 'step': 3825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:27.134522', 'step': 3825, 'epoch': 1} {'type': 'loss', 'content': 0.18479518592357635, 'timestamp': '2025-10-01 04:17:27.139904', 'step': 3826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:27.172316', 'step': 3826, 'epoch': 1} {'type': 'loss', 'content': 0.1385558396577835, 'timestamp': '2025-10-01 04:17:27.174515', 'step': 3827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:27.209751', 'step': 3827, 'epoch': 1} {'type': 'loss', 'content': 0.19952407479286194, 'timestamp': '2025-10-01 04:17:27.233231', 'step': 3828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:27.267304', 'step': 3828, 'epoch': 1} {'type': 'loss', 'content': 0.1486334204673767, 'timestamp': '2025-10-01 04:17:27.269205', 'step': 3829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:27.303380', 'step': 3829, 'epoch': 1} {'type': 'loss', 'content': 0.1009930670261383, 'timestamp': '2025-10-01 04:17:27.305230', 'step': 3830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:27.344626', 'step': 3830, 'epoch': 1} {'type': 'loss', 'content': 0.15209300816059113, 'timestamp': '2025-10-01 04:17:27.357200', 'step': 3831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:27.389348', 'step': 3831, 'epoch': 1} {'type': 'loss', 'content': 0.20878784358501434, 'timestamp': '2025-10-01 04:17:27.412844', 'step': 3832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:27.449626', 'step': 3832, 'epoch': 1} {'type': 'loss', 'content': 0.1894058734178543, 'timestamp': '2025-10-01 04:17:27.451539', 'step': 3833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:27.488081', 'step': 3833, 'epoch': 1} {'type': 'loss', 'content': 0.2070813924074173, 'timestamp': '2025-10-01 04:17:27.490258', 'step': 3834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:27.526045', 'step': 3834, 'epoch': 1} {'type': 'loss', 'content': 0.22363972663879395, 'timestamp': '2025-10-01 04:17:27.528051', 'step': 3835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:27.565450', 'step': 3835, 'epoch': 1} {'type': 'loss', 'content': 0.16253109276294708, 'timestamp': '2025-10-01 04:17:27.591032', 'step': 3836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:27.622286', 'step': 3836, 'epoch': 1} {'type': 'loss', 'content': 0.17701996862888336, 'timestamp': '2025-10-01 04:17:27.624188', 'step': 3837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:27.656311', 'step': 3837, 'epoch': 1} {'type': 'loss', 'content': 0.10718247294425964, 'timestamp': '2025-10-01 04:17:27.658353', 'step': 3838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:27.703861', 'step': 3838, 'epoch': 1} {'type': 'loss', 'content': 0.18828028440475464, 'timestamp': '2025-10-01 04:17:27.705881', 'step': 3839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:27.735608', 'step': 3839, 'epoch': 1} {'type': 'loss', 'content': 0.14519497752189636, 'timestamp': '2025-10-01 04:17:27.759208', 'step': 3840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:27.791199', 'step': 3840, 'epoch': 1} {'type': 'loss', 'content': 0.1967434585094452, 'timestamp': '2025-10-01 04:17:27.793241', 'step': 3841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:27.826435', 'step': 3841, 'epoch': 1} {'type': 'loss', 'content': 0.19580329954624176, 'timestamp': '2025-10-01 04:17:27.828391', 'step': 3842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:27.862085', 'step': 3842, 'epoch': 1} {'type': 'loss', 'content': 0.1959538608789444, 'timestamp': '2025-10-01 04:17:27.863915', 'step': 3843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:27.894790', 'step': 3843, 'epoch': 1} {'type': 'loss', 'content': 0.19081659615039825, 'timestamp': '2025-10-01 04:17:27.919720', 'step': 3844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:27.950981', 'step': 3844, 'epoch': 1} {'type': 'loss', 'content': 0.25607219338417053, 'timestamp': '2025-10-01 04:17:27.952902', 'step': 3845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:27.990454', 'step': 3845, 'epoch': 1} {'type': 'loss', 'content': 0.17658817768096924, 'timestamp': '2025-10-01 04:17:27.993416', 'step': 3846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:28.037207', 'step': 3846, 'epoch': 1} {'type': 'loss', 'content': 0.28494858741760254, 'timestamp': '2025-10-01 04:17:28.039350', 'step': 3847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.072472', 'step': 3847, 'epoch': 1} {'type': 'loss', 'content': 0.2507191598415375, 'timestamp': '2025-10-01 04:17:28.095797', 'step': 3848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.130583', 'step': 3848, 'epoch': 1} {'type': 'loss', 'content': 0.19275127351284027, 'timestamp': '2025-10-01 04:17:28.132448', 'step': 3849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.173315', 'step': 3849, 'epoch': 1} {'type': 'loss', 'content': 0.15918467938899994, 'timestamp': '2025-10-01 04:17:28.175157', 'step': 3850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:28.207380', 'step': 3850, 'epoch': 1} {'type': 'loss', 'content': 0.19538523256778717, 'timestamp': '2025-10-01 04:17:28.209404', 'step': 3851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.253068', 'step': 3851, 'epoch': 1} {'type': 'loss', 'content': 0.18947654962539673, 'timestamp': '2025-10-01 04:17:28.276395', 'step': 3852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:28.308677', 'step': 3852, 'epoch': 1} {'type': 'loss', 'content': 0.18324708938598633, 'timestamp': '2025-10-01 04:17:28.310643', 'step': 3853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:28.343687', 'step': 3853, 'epoch': 1} {'type': 'loss', 'content': 0.16183577477931976, 'timestamp': '2025-10-01 04:17:28.346041', 'step': 3854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:28.377986', 'step': 3854, 'epoch': 1} {'type': 'loss', 'content': 0.18069934844970703, 'timestamp': '2025-10-01 04:17:28.380029', 'step': 3855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.415706', 'step': 3855, 'epoch': 1} {'type': 'loss', 'content': 0.17457935214042664, 'timestamp': '2025-10-01 04:17:28.439095', 'step': 3856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.476801', 'step': 3856, 'epoch': 1} {'type': 'loss', 'content': 0.15890762209892273, 'timestamp': '2025-10-01 04:17:28.479226', 'step': 3857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.510684', 'step': 3857, 'epoch': 1} {'type': 'loss', 'content': 0.17209327220916748, 'timestamp': '2025-10-01 04:17:28.512680', 'step': 3858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:28.544094', 'step': 3858, 'epoch': 1} {'type': 'loss', 'content': 0.14878185093402863, 'timestamp': '2025-10-01 04:17:28.547111', 'step': 3859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:28.579521', 'step': 3859, 'epoch': 1} {'type': 'loss', 'content': 0.0984286516904831, 'timestamp': '2025-10-01 04:17:28.602965', 'step': 3860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.642572', 'step': 3860, 'epoch': 1} {'type': 'loss', 'content': 0.24380744993686676, 'timestamp': '2025-10-01 04:17:28.644981', 'step': 3861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.681147', 'step': 3861, 'epoch': 1} {'type': 'loss', 'content': 0.16925369203090668, 'timestamp': '2025-10-01 04:17:28.683097', 'step': 3862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:28.723689', 'step': 3862, 'epoch': 1} {'type': 'loss', 'content': 0.13572299480438232, 'timestamp': '2025-10-01 04:17:28.726117', 'step': 3863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:28.756802', 'step': 3863, 'epoch': 1} {'type': 'loss', 'content': 0.08750215172767639, 'timestamp': '2025-10-01 04:17:28.780237', 'step': 3864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:28.817816', 'step': 3864, 'epoch': 1} {'type': 'loss', 'content': 0.21717625856399536, 'timestamp': '2025-10-01 04:17:28.819799', 'step': 3865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:28.850697', 'step': 3865, 'epoch': 1} {'type': 'loss', 'content': 0.24453169107437134, 'timestamp': '2025-10-01 04:17:28.852978', 'step': 3866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.889318', 'step': 3866, 'epoch': 1} {'type': 'loss', 'content': 0.15296435356140137, 'timestamp': '2025-10-01 04:17:28.891323', 'step': 3867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:28.927765', 'step': 3867, 'epoch': 1} {'type': 'loss', 'content': 0.11823044717311859, 'timestamp': '2025-10-01 04:17:28.951347', 'step': 3868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:28.987182', 'step': 3868, 'epoch': 1} {'type': 'loss', 'content': 0.1939268261194229, 'timestamp': '2025-10-01 04:17:28.989761', 'step': 3869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:29.031313', 'step': 3869, 'epoch': 1} {'type': 'loss', 'content': 0.18242739140987396, 'timestamp': '2025-10-01 04:17:29.043366', 'step': 3870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:29.075551', 'step': 3870, 'epoch': 1} {'type': 'loss', 'content': 0.12996667623519897, 'timestamp': '2025-10-01 04:17:29.077859', 'step': 3871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:29.110462', 'step': 3871, 'epoch': 1} {'type': 'loss', 'content': 0.2807536721229553, 'timestamp': '2025-10-01 04:17:29.134036', 'step': 3872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:29.165577', 'step': 3872, 'epoch': 1} {'type': 'loss', 'content': 0.275060772895813, 'timestamp': '2025-10-01 04:17:29.168312', 'step': 3873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:29.218446', 'step': 3873, 'epoch': 1} {'type': 'loss', 'content': 0.1401766687631607, 'timestamp': '2025-10-01 04:17:29.220762', 'step': 3874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:29.257291', 'step': 3874, 'epoch': 1} {'type': 'loss', 'content': 0.09571100026369095, 'timestamp': '2025-10-01 04:17:29.259361', 'step': 3875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:29.290907', 'step': 3875, 'epoch': 1} {'type': 'loss', 'content': 0.1288757473230362, 'timestamp': '2025-10-01 04:17:29.314347', 'step': 3876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:29.352573', 'step': 3876, 'epoch': 1} {'type': 'loss', 'content': 0.12281670421361923, 'timestamp': '2025-10-01 04:17:29.354807', 'step': 3877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:29.387892', 'step': 3877, 'epoch': 1} {'type': 'loss', 'content': 0.11305103451013565, 'timestamp': '2025-10-01 04:17:29.390130', 'step': 3878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:29.428254', 'step': 3878, 'epoch': 1} {'type': 'loss', 'content': 0.2098066359758377, 'timestamp': '2025-10-01 04:17:29.430457', 'step': 3879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:29.472683', 'step': 3879, 'epoch': 1} {'type': 'loss', 'content': 0.09519362449645996, 'timestamp': '2025-10-01 04:17:29.497482', 'step': 3880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:29.529252', 'step': 3880, 'epoch': 1} {'type': 'loss', 'content': 0.20365749299526215, 'timestamp': '2025-10-01 04:17:29.531601', 'step': 3881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:29.569964', 'step': 3881, 'epoch': 1} {'type': 'loss', 'content': 0.16740766167640686, 'timestamp': '2025-10-01 04:17:29.573910', 'step': 3882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:29.612250', 'step': 3882, 'epoch': 1} {'type': 'loss', 'content': 0.12865984439849854, 'timestamp': '2025-10-01 04:17:29.614501', 'step': 3883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:29.645695', 'step': 3883, 'epoch': 1} {'type': 'loss', 'content': 0.19214433431625366, 'timestamp': '2025-10-01 04:17:29.670986', 'step': 3884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:29.702202', 'step': 3884, 'epoch': 1} {'type': 'loss', 'content': 0.23779529333114624, 'timestamp': '2025-10-01 04:17:29.704700', 'step': 3885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:29.736463', 'step': 3885, 'epoch': 1} {'type': 'loss', 'content': 0.12295019626617432, 'timestamp': '2025-10-01 04:17:29.738584', 'step': 3886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:29.770149', 'step': 3886, 'epoch': 1} {'type': 'loss', 'content': 0.13465292751789093, 'timestamp': '2025-10-01 04:17:29.772078', 'step': 3887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:29.808875', 'step': 3887, 'epoch': 1} {'type': 'loss', 'content': 0.1805817186832428, 'timestamp': '2025-10-01 04:17:29.833044', 'step': 3888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:29.869416', 'step': 3888, 'epoch': 1} {'type': 'loss', 'content': 0.2121555209159851, 'timestamp': '2025-10-01 04:17:29.871541', 'step': 3889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:29.911131', 'step': 3889, 'epoch': 1} {'type': 'loss', 'content': 0.18457993865013123, 'timestamp': '2025-10-01 04:17:29.914848', 'step': 3890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:29.951925', 'step': 3890, 'epoch': 1} {'type': 'loss', 'content': 0.18312719464302063, 'timestamp': '2025-10-01 04:17:29.953897', 'step': 3891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:29.988662', 'step': 3891, 'epoch': 1} {'type': 'loss', 'content': 0.18962284922599792, 'timestamp': '2025-10-01 04:17:30.013139', 'step': 3892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:30.062467', 'step': 3892, 'epoch': 1} {'type': 'loss', 'content': 0.1265074461698532, 'timestamp': '2025-10-01 04:17:30.064493', 'step': 3893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:30.108881', 'step': 3893, 'epoch': 1} {'type': 'loss', 'content': 0.15919268131256104, 'timestamp': '2025-10-01 04:17:30.111359', 'step': 3894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:30.145137', 'step': 3894, 'epoch': 1} {'type': 'loss', 'content': 0.1500348150730133, 'timestamp': '2025-10-01 04:17:30.147369', 'step': 3895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:30.190271', 'step': 3895, 'epoch': 1} {'type': 'loss', 'content': 0.14878511428833008, 'timestamp': '2025-10-01 04:17:30.215480', 'step': 3896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:30.272548', 'step': 3896, 'epoch': 1} {'type': 'loss', 'content': 0.11154928058385849, 'timestamp': '2025-10-01 04:17:30.274800', 'step': 3897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:30.306556', 'step': 3897, 'epoch': 1} {'type': 'loss', 'content': 0.15229582786560059, 'timestamp': '2025-10-01 04:17:30.308467', 'step': 3898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:30.342394', 'step': 3898, 'epoch': 1} {'type': 'loss', 'content': 0.22975033521652222, 'timestamp': '2025-10-01 04:17:30.344759', 'step': 3899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:30.376089', 'step': 3899, 'epoch': 1} {'type': 'loss', 'content': 0.19025105237960815, 'timestamp': '2025-10-01 04:17:30.399909', 'step': 3900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:30.437986', 'step': 3900, 'epoch': 1} {'type': 'loss', 'content': 0.19299164414405823, 'timestamp': '2025-10-01 04:17:30.440527', 'step': 3901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:30.472423', 'step': 3901, 'epoch': 1} {'type': 'loss', 'content': 0.10874459147453308, 'timestamp': '2025-10-01 04:17:30.478482', 'step': 3902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:30.531838', 'step': 3902, 'epoch': 1} {'type': 'loss', 'content': 0.15179668366909027, 'timestamp': '2025-10-01 04:17:30.533502', 'step': 3903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:30.572396', 'step': 3903, 'epoch': 1} {'type': 'loss', 'content': 0.12334520369768143, 'timestamp': '2025-10-01 04:17:30.595746', 'step': 3904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:30.633132', 'step': 3904, 'epoch': 1} {'type': 'loss', 'content': 0.18309181928634644, 'timestamp': '2025-10-01 04:17:30.635304', 'step': 3905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:30.667159', 'step': 3905, 'epoch': 1} {'type': 'loss', 'content': 0.1398685723543167, 'timestamp': '2025-10-01 04:17:30.669156', 'step': 3906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:30.701641', 'step': 3906, 'epoch': 1} {'type': 'loss', 'content': 0.17518483102321625, 'timestamp': '2025-10-01 04:17:30.704263', 'step': 3907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:30.736544', 'step': 3907, 'epoch': 1} {'type': 'loss', 'content': 0.12035415321588516, 'timestamp': '2025-10-01 04:17:30.760146', 'step': 3908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:30.792332', 'step': 3908, 'epoch': 1} {'type': 'loss', 'content': 0.20197971165180206, 'timestamp': '2025-10-01 04:17:30.794226', 'step': 3909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:30.835566', 'step': 3909, 'epoch': 1} {'type': 'loss', 'content': 0.17750543355941772, 'timestamp': '2025-10-01 04:17:30.837573', 'step': 3910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:30.870086', 'step': 3910, 'epoch': 1} {'type': 'loss', 'content': 0.15995965898036957, 'timestamp': '2025-10-01 04:17:30.872923', 'step': 3911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:30.905300', 'step': 3911, 'epoch': 1} {'type': 'loss', 'content': 0.14123563468456268, 'timestamp': '2025-10-01 04:17:30.928746', 'step': 3912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:30.973093', 'step': 3912, 'epoch': 1} {'type': 'loss', 'content': 0.14200125634670258, 'timestamp': '2025-10-01 04:17:30.975152', 'step': 3913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:31.008121', 'step': 3913, 'epoch': 1} {'type': 'loss', 'content': 0.13562844693660736, 'timestamp': '2025-10-01 04:17:31.011433', 'step': 3914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:31.044849', 'step': 3914, 'epoch': 1} {'type': 'loss', 'content': 0.18190927803516388, 'timestamp': '2025-10-01 04:17:31.047763', 'step': 3915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:31.081690', 'step': 3915, 'epoch': 1} {'type': 'loss', 'content': 0.10490796715021133, 'timestamp': '2025-10-01 04:17:31.112534', 'step': 3916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:31.146403', 'step': 3916, 'epoch': 1} {'type': 'loss', 'content': 0.23283343017101288, 'timestamp': '2025-10-01 04:17:31.148399', 'step': 3917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:31.182709', 'step': 3917, 'epoch': 1} {'type': 'loss', 'content': 0.2556789219379425, 'timestamp': '2025-10-01 04:17:31.185996', 'step': 3918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:31.226559', 'step': 3918, 'epoch': 1} {'type': 'loss', 'content': 0.2620033621788025, 'timestamp': '2025-10-01 04:17:31.228547', 'step': 3919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:31.273850', 'step': 3919, 'epoch': 1} {'type': 'loss', 'content': 0.19487623870372772, 'timestamp': '2025-10-01 04:17:31.297985', 'step': 3920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:31.331271', 'step': 3920, 'epoch': 1} {'type': 'loss', 'content': 0.1248226910829544, 'timestamp': '2025-10-01 04:17:31.333184', 'step': 3921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:31.367689', 'step': 3921, 'epoch': 1} {'type': 'loss', 'content': 0.19244182109832764, 'timestamp': '2025-10-01 04:17:31.369615', 'step': 3922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:31.401827', 'step': 3922, 'epoch': 1} {'type': 'loss', 'content': 0.13132897019386292, 'timestamp': '2025-10-01 04:17:31.404837', 'step': 3923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:31.436476', 'step': 3923, 'epoch': 1} {'type': 'loss', 'content': 0.1306469738483429, 'timestamp': '2025-10-01 04:17:31.459840', 'step': 3924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:31.492332', 'step': 3924, 'epoch': 1} {'type': 'loss', 'content': 0.12122681736946106, 'timestamp': '2025-10-01 04:17:31.494441', 'step': 3925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:31.529806', 'step': 3925, 'epoch': 1} {'type': 'loss', 'content': 0.18183578550815582, 'timestamp': '2025-10-01 04:17:31.531693', 'step': 3926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:31.574828', 'step': 3926, 'epoch': 1} {'type': 'loss', 'content': 0.15229912102222443, 'timestamp': '2025-10-01 04:17:31.576860', 'step': 3927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:31.620862', 'step': 3927, 'epoch': 1} {'type': 'loss', 'content': 0.15775294601917267, 'timestamp': '2025-10-01 04:17:31.645081', 'step': 3928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:31.677157', 'step': 3928, 'epoch': 1} {'type': 'loss', 'content': 0.1206628605723381, 'timestamp': '2025-10-01 04:17:31.679471', 'step': 3929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:31.715996', 'step': 3929, 'epoch': 1} {'type': 'loss', 'content': 0.13050127029418945, 'timestamp': '2025-10-01 04:17:31.719239', 'step': 3930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:31.751852', 'step': 3930, 'epoch': 1} {'type': 'loss', 'content': 0.2323775440454483, 'timestamp': '2025-10-01 04:17:31.755104', 'step': 3931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:31.797471', 'step': 3931, 'epoch': 1} {'type': 'loss', 'content': 0.14052307605743408, 'timestamp': '2025-10-01 04:17:31.821105', 'step': 3932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:31.854722', 'step': 3932, 'epoch': 1} {'type': 'loss', 'content': 0.16379395127296448, 'timestamp': '2025-10-01 04:17:31.856901', 'step': 3933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:31.889682', 'step': 3933, 'epoch': 1} {'type': 'loss', 'content': 0.13014693558216095, 'timestamp': '2025-10-01 04:17:31.891482', 'step': 3934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:31.924341', 'step': 3934, 'epoch': 1} {'type': 'loss', 'content': 0.2062532603740692, 'timestamp': '2025-10-01 04:17:31.926745', 'step': 3935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:31.959164', 'step': 3935, 'epoch': 1} {'type': 'loss', 'content': 0.09562492370605469, 'timestamp': '2025-10-01 04:17:31.983148', 'step': 3936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:32.025922', 'step': 3936, 'epoch': 1} {'type': 'loss', 'content': 0.15404660999774933, 'timestamp': '2025-10-01 04:17:32.028170', 'step': 3937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:32.069688', 'step': 3937, 'epoch': 1} {'type': 'loss', 'content': 0.16578000783920288, 'timestamp': '2025-10-01 04:17:32.071530', 'step': 3938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:32.106432', 'step': 3938, 'epoch': 1} {'type': 'loss', 'content': 0.19381512701511383, 'timestamp': '2025-10-01 04:17:32.108571', 'step': 3939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:32.147338', 'step': 3939, 'epoch': 1} {'type': 'loss', 'content': 0.18388444185256958, 'timestamp': '2025-10-01 04:17:32.174157', 'step': 3940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:32.209398', 'step': 3940, 'epoch': 1} {'type': 'loss', 'content': 0.21896032989025116, 'timestamp': '2025-10-01 04:17:32.211414', 'step': 3941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:32.242749', 'step': 3941, 'epoch': 1} {'type': 'loss', 'content': 0.21193818747997284, 'timestamp': '2025-10-01 04:17:32.244944', 'step': 3942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:32.276710', 'step': 3942, 'epoch': 1} {'type': 'loss', 'content': 0.20024727284908295, 'timestamp': '2025-10-01 04:17:32.280361', 'step': 3943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:32.317707', 'step': 3943, 'epoch': 1} {'type': 'loss', 'content': 0.20646020770072937, 'timestamp': '2025-10-01 04:17:32.341102', 'step': 3944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:32.381504', 'step': 3944, 'epoch': 1} {'type': 'loss', 'content': 0.1847938597202301, 'timestamp': '2025-10-01 04:17:32.384751', 'step': 3945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:32.421601', 'step': 3945, 'epoch': 1} {'type': 'loss', 'content': 0.19300886988639832, 'timestamp': '2025-10-01 04:17:32.428154', 'step': 3946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:32.463540', 'step': 3946, 'epoch': 1} {'type': 'loss', 'content': 0.1700611561536789, 'timestamp': '2025-10-01 04:17:32.466451', 'step': 3947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:32.499330', 'step': 3947, 'epoch': 1} {'type': 'loss', 'content': 0.22981572151184082, 'timestamp': '2025-10-01 04:17:32.522745', 'step': 3948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:32.558407', 'step': 3948, 'epoch': 1} {'type': 'loss', 'content': 0.14096106588840485, 'timestamp': '2025-10-01 04:17:32.560318', 'step': 3949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:32.605193', 'step': 3949, 'epoch': 1} {'type': 'loss', 'content': 0.16238385438919067, 'timestamp': '2025-10-01 04:17:32.607292', 'step': 3950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:32.649484', 'step': 3950, 'epoch': 1} {'type': 'loss', 'content': 0.3225996792316437, 'timestamp': '2025-10-01 04:17:32.651637', 'step': 3951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:32.688947', 'step': 3951, 'epoch': 1} {'type': 'loss', 'content': 0.1513470709323883, 'timestamp': '2025-10-01 04:17:32.712350', 'step': 3952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:32.747851', 'step': 3952, 'epoch': 1} {'type': 'loss', 'content': 0.22496502101421356, 'timestamp': '2025-10-01 04:17:32.750634', 'step': 3953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:32.781877', 'step': 3953, 'epoch': 1} {'type': 'loss', 'content': 0.13261328637599945, 'timestamp': '2025-10-01 04:17:32.784425', 'step': 3954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:32.818185', 'step': 3954, 'epoch': 1} {'type': 'loss', 'content': 0.10909083485603333, 'timestamp': '2025-10-01 04:17:32.820878', 'step': 3955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:32.853062', 'step': 3955, 'epoch': 1} {'type': 'loss', 'content': 0.09460848569869995, 'timestamp': '2025-10-01 04:17:32.876972', 'step': 3956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:32.911474', 'step': 3956, 'epoch': 1} {'type': 'loss', 'content': 0.11830439418554306, 'timestamp': '2025-10-01 04:17:32.914189', 'step': 3957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:32.952085', 'step': 3957, 'epoch': 1} {'type': 'loss', 'content': 0.18407811224460602, 'timestamp': '2025-10-01 04:17:32.954543', 'step': 3958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:32.991249', 'step': 3958, 'epoch': 1} {'type': 'loss', 'content': 0.18877170979976654, 'timestamp': '2025-10-01 04:17:32.994578', 'step': 3959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:33.028294', 'step': 3959, 'epoch': 1} {'type': 'loss', 'content': 0.1004195287823677, 'timestamp': '2025-10-01 04:17:33.052157', 'step': 3960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:33.090057', 'step': 3960, 'epoch': 1} {'type': 'loss', 'content': 0.21910980343818665, 'timestamp': '2025-10-01 04:17:33.093477', 'step': 3961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:33.135078', 'step': 3961, 'epoch': 1} {'type': 'loss', 'content': 0.16859325766563416, 'timestamp': '2025-10-01 04:17:33.137062', 'step': 3962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:33.182792', 'step': 3962, 'epoch': 1} {'type': 'loss', 'content': 0.18158884346485138, 'timestamp': '2025-10-01 04:17:33.185172', 'step': 3963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:33.224574', 'step': 3963, 'epoch': 1} {'type': 'loss', 'content': 0.1390639841556549, 'timestamp': '2025-10-01 04:17:33.248122', 'step': 3964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:33.294507', 'step': 3964, 'epoch': 1} {'type': 'loss', 'content': 0.14540481567382812, 'timestamp': '2025-10-01 04:17:33.297844', 'step': 3965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:33.332449', 'step': 3965, 'epoch': 1} {'type': 'loss', 'content': 0.2259981632232666, 'timestamp': '2025-10-01 04:17:33.334571', 'step': 3966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:33.377214', 'step': 3966, 'epoch': 1} {'type': 'loss', 'content': 0.09662048518657684, 'timestamp': '2025-10-01 04:17:33.379845', 'step': 3967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:33.423090', 'step': 3967, 'epoch': 1} {'type': 'loss', 'content': 0.150464728474617, 'timestamp': '2025-10-01 04:17:33.446768', 'step': 3968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:33.482275', 'step': 3968, 'epoch': 1} {'type': 'loss', 'content': 0.27723580598831177, 'timestamp': '2025-10-01 04:17:33.484666', 'step': 3969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:33.529386', 'step': 3969, 'epoch': 1} {'type': 'loss', 'content': 0.19932588934898376, 'timestamp': '2025-10-01 04:17:33.531277', 'step': 3970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:33.565161', 'step': 3970, 'epoch': 1} {'type': 'loss', 'content': 0.12212017923593521, 'timestamp': '2025-10-01 04:17:33.567911', 'step': 3971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:33.601296', 'step': 3971, 'epoch': 1} {'type': 'loss', 'content': 0.16986967623233795, 'timestamp': '2025-10-01 04:17:33.625169', 'step': 3972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:33.656320', 'step': 3972, 'epoch': 1} {'type': 'loss', 'content': 0.20331574976444244, 'timestamp': '2025-10-01 04:17:33.658496', 'step': 3973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:33.692741', 'step': 3973, 'epoch': 1} {'type': 'loss', 'content': 0.15932434797286987, 'timestamp': '2025-10-01 04:17:33.694927', 'step': 3974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:33.743293', 'step': 3974, 'epoch': 1} {'type': 'loss', 'content': 0.21499203145503998, 'timestamp': '2025-10-01 04:17:33.745619', 'step': 3975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:33.778032', 'step': 3975, 'epoch': 1} {'type': 'loss', 'content': 0.14496786892414093, 'timestamp': '2025-10-01 04:17:33.801626', 'step': 3976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:33.834205', 'step': 3976, 'epoch': 1} {'type': 'loss', 'content': 0.16631536185741425, 'timestamp': '2025-10-01 04:17:33.836580', 'step': 3977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:33.884746', 'step': 3977, 'epoch': 1} {'type': 'loss', 'content': 0.151324063539505, 'timestamp': '2025-10-01 04:17:33.886889', 'step': 3978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:33.923183', 'step': 3978, 'epoch': 1} {'type': 'loss', 'content': 0.18001030385494232, 'timestamp': '2025-10-01 04:17:33.926701', 'step': 3979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:33.973099', 'step': 3979, 'epoch': 1} {'type': 'loss', 'content': 0.19520653784275055, 'timestamp': '2025-10-01 04:17:33.996735', 'step': 3980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:34.028611', 'step': 3980, 'epoch': 1} {'type': 'loss', 'content': 0.10517687350511551, 'timestamp': '2025-10-01 04:17:34.031299', 'step': 3981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:17:34.063685', 'step': 3981, 'epoch': 1} {'type': 'loss', 'content': 0.1565532684326172, 'timestamp': '2025-10-01 04:17:34.068341', 'step': 3982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:34.100502', 'step': 3982, 'epoch': 1} {'type': 'loss', 'content': 0.13090912997722626, 'timestamp': '2025-10-01 04:17:34.102959', 'step': 3983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:34.134990', 'step': 3983, 'epoch': 1} {'type': 'loss', 'content': 0.0923270434141159, 'timestamp': '2025-10-01 04:17:34.158770', 'step': 3984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:34.200019', 'step': 3984, 'epoch': 1} {'type': 'loss', 'content': 0.13158702850341797, 'timestamp': '2025-10-01 04:17:34.202169', 'step': 3985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:34.235304', 'step': 3985, 'epoch': 1} {'type': 'loss', 'content': 0.1335875689983368, 'timestamp': '2025-10-01 04:17:34.238004', 'step': 3986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:34.280774', 'step': 3986, 'epoch': 1} {'type': 'loss', 'content': 0.11627128720283508, 'timestamp': '2025-10-01 04:17:34.285899', 'step': 3987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:34.318362', 'step': 3987, 'epoch': 1} {'type': 'loss', 'content': 0.09126879274845123, 'timestamp': '2025-10-01 04:17:34.342102', 'step': 3988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:34.393118', 'step': 3988, 'epoch': 1} {'type': 'loss', 'content': 0.17020024359226227, 'timestamp': '2025-10-01 04:17:34.394963', 'step': 3989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:17:34.428814', 'step': 3989, 'epoch': 1} {'type': 'loss', 'content': 0.1366594284772873, 'timestamp': '2025-10-01 04:17:34.433184', 'step': 3990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:34.479691', 'step': 3990, 'epoch': 1} {'type': 'loss', 'content': 0.18276993930339813, 'timestamp': '2025-10-01 04:17:34.485293', 'step': 3991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:34.519265', 'step': 3991, 'epoch': 1} {'type': 'loss', 'content': 0.12697932124137878, 'timestamp': '2025-10-01 04:17:34.543079', 'step': 3992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:34.576197', 'step': 3992, 'epoch': 1} {'type': 'loss', 'content': 0.09667544811964035, 'timestamp': '2025-10-01 04:17:34.579190', 'step': 3993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:34.614028', 'step': 3993, 'epoch': 1} {'type': 'loss', 'content': 0.14434027671813965, 'timestamp': '2025-10-01 04:17:34.616382', 'step': 3994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:34.658486', 'step': 3994, 'epoch': 1} {'type': 'loss', 'content': 0.13511605560779572, 'timestamp': '2025-10-01 04:17:34.660561', 'step': 3995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:34.694183', 'step': 3995, 'epoch': 1} {'type': 'loss', 'content': 0.19625261425971985, 'timestamp': '2025-10-01 04:17:34.717542', 'step': 3996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:34.760090', 'step': 3996, 'epoch': 1} {'type': 'loss', 'content': 0.14050936698913574, 'timestamp': '2025-10-01 04:17:34.762054', 'step': 3997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:34.795366', 'step': 3997, 'epoch': 1} {'type': 'loss', 'content': 0.23591303825378418, 'timestamp': '2025-10-01 04:17:34.797267', 'step': 3998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:34.832100', 'step': 3998, 'epoch': 1} {'type': 'loss', 'content': 0.1412571519613266, 'timestamp': '2025-10-01 04:17:34.834090', 'step': 3999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:34.868528', 'step': 3999, 'epoch': 1} {'type': 'loss', 'content': 0.22385630011558533, 'timestamp': '2025-10-01 04:17:34.895028', 'step': 4000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-10-01 04:17:40.164433', 'step': 4000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:40.205231', 'step': 4000, 'epoch': 1} {'type': 'loss', 'content': 0.1534299999475479, 'timestamp': '2025-10-01 04:17:40.207836', 'step': 4001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:40.239658', 'step': 4001, 'epoch': 1} {'type': 'loss', 'content': 0.11536186933517456, 'timestamp': '2025-10-01 04:17:40.242350', 'step': 4002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:40.278110', 'step': 4002, 'epoch': 1} {'type': 'loss', 'content': 0.14482374489307404, 'timestamp': '2025-10-01 04:17:40.280084', 'step': 4003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:40.315097', 'step': 4003, 'epoch': 1} {'type': 'loss', 'content': 0.19921904802322388, 'timestamp': '2025-10-01 04:17:40.338564', 'step': 4004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:40.369798', 'step': 4004, 'epoch': 1} {'type': 'loss', 'content': 0.2094232589006424, 'timestamp': '2025-10-01 04:17:40.371867', 'step': 4005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:40.411031', 'step': 4005, 'epoch': 1} {'type': 'loss', 'content': 0.1973637342453003, 'timestamp': '2025-10-01 04:17:40.412985', 'step': 4006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:40.445530', 'step': 4006, 'epoch': 1} {'type': 'loss', 'content': 0.1679902821779251, 'timestamp': '2025-10-01 04:17:40.447617', 'step': 4007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:40.483527', 'step': 4007, 'epoch': 1} {'type': 'loss', 'content': 0.1378500610589981, 'timestamp': '2025-10-01 04:17:40.507215', 'step': 4008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:40.539746', 'step': 4008, 'epoch': 1} {'type': 'loss', 'content': 0.22626271843910217, 'timestamp': '2025-10-01 04:17:40.541636', 'step': 4009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:40.585452', 'step': 4009, 'epoch': 1} {'type': 'loss', 'content': 0.11921834200620651, 'timestamp': '2025-10-01 04:17:40.587301', 'step': 4010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:40.625209', 'step': 4010, 'epoch': 1} {'type': 'loss', 'content': 0.2199573963880539, 'timestamp': '2025-10-01 04:17:40.627189', 'step': 4011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:40.664478', 'step': 4011, 'epoch': 1} {'type': 'loss', 'content': 0.09963583946228027, 'timestamp': '2025-10-01 04:17:40.688498', 'step': 4012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:40.725403', 'step': 4012, 'epoch': 1} {'type': 'loss', 'content': 0.25775715708732605, 'timestamp': '2025-10-01 04:17:40.727602', 'step': 4013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:40.766052', 'step': 4013, 'epoch': 1} {'type': 'loss', 'content': 0.12765078246593475, 'timestamp': '2025-10-01 04:17:40.768047', 'step': 4014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:40.810089', 'step': 4014, 'epoch': 1} {'type': 'loss', 'content': 0.14296118915081024, 'timestamp': '2025-10-01 04:17:40.812430', 'step': 4015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:40.844364', 'step': 4015, 'epoch': 1} {'type': 'loss', 'content': 0.2007817029953003, 'timestamp': '2025-10-01 04:17:40.868051', 'step': 4016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:40.900591', 'step': 4016, 'epoch': 1} {'type': 'loss', 'content': 0.09475672990083694, 'timestamp': '2025-10-01 04:17:40.902414', 'step': 4017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:40.933992', 'step': 4017, 'epoch': 1} {'type': 'loss', 'content': 0.1518162041902542, 'timestamp': '2025-10-01 04:17:40.936056', 'step': 4018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:40.973361', 'step': 4018, 'epoch': 1} {'type': 'loss', 'content': 0.16841614246368408, 'timestamp': '2025-10-01 04:17:40.975518', 'step': 4019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.011530', 'step': 4019, 'epoch': 1} {'type': 'loss', 'content': 0.10422523319721222, 'timestamp': '2025-10-01 04:17:41.035345', 'step': 4020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:41.073527', 'step': 4020, 'epoch': 1} {'type': 'loss', 'content': 0.13597829639911652, 'timestamp': '2025-10-01 04:17:41.075342', 'step': 4021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:41.110574', 'step': 4021, 'epoch': 1} {'type': 'loss', 'content': 0.3182932138442993, 'timestamp': '2025-10-01 04:17:41.112672', 'step': 4022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.144088', 'step': 4022, 'epoch': 1} {'type': 'loss', 'content': 0.12952181696891785, 'timestamp': '2025-10-01 04:17:41.146074', 'step': 4023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:41.191465', 'step': 4023, 'epoch': 1} {'type': 'loss', 'content': 0.19475525617599487, 'timestamp': '2025-10-01 04:17:41.214877', 'step': 4024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.246797', 'step': 4024, 'epoch': 1} {'type': 'loss', 'content': 0.11550065875053406, 'timestamp': '2025-10-01 04:17:41.248896', 'step': 4025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:41.281130', 'step': 4025, 'epoch': 1} {'type': 'loss', 'content': 0.13298308849334717, 'timestamp': '2025-10-01 04:17:41.283011', 'step': 4026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.317079', 'step': 4026, 'epoch': 1} {'type': 'loss', 'content': 0.16809378564357758, 'timestamp': '2025-10-01 04:17:41.319644', 'step': 4027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:41.351104', 'step': 4027, 'epoch': 1} {'type': 'loss', 'content': 0.17142851650714874, 'timestamp': '2025-10-01 04:17:41.374464', 'step': 4028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.408708', 'step': 4028, 'epoch': 1} {'type': 'loss', 'content': 0.19745254516601562, 'timestamp': '2025-10-01 04:17:41.410656', 'step': 4029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:41.442807', 'step': 4029, 'epoch': 1} {'type': 'loss', 'content': 0.1592816412448883, 'timestamp': '2025-10-01 04:17:41.444864', 'step': 4030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:41.488698', 'step': 4030, 'epoch': 1} {'type': 'loss', 'content': 0.1781175285577774, 'timestamp': '2025-10-01 04:17:41.490850', 'step': 4031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:41.532591', 'step': 4031, 'epoch': 1} {'type': 'loss', 'content': 0.1699202060699463, 'timestamp': '2025-10-01 04:17:41.556022', 'step': 4032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:41.586870', 'step': 4032, 'epoch': 1} {'type': 'loss', 'content': 0.1753533035516739, 'timestamp': '2025-10-01 04:17:41.589075', 'step': 4033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:41.619539', 'step': 4033, 'epoch': 1} {'type': 'loss', 'content': 0.17999427020549774, 'timestamp': '2025-10-01 04:17:41.622058', 'step': 4034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.659692', 'step': 4034, 'epoch': 1} {'type': 'loss', 'content': 0.1795928031206131, 'timestamp': '2025-10-01 04:17:41.661530', 'step': 4035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:41.696237', 'step': 4035, 'epoch': 1} {'type': 'loss', 'content': 0.24035955965518951, 'timestamp': '2025-10-01 04:17:41.720403', 'step': 4036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.757312', 'step': 4036, 'epoch': 1} {'type': 'loss', 'content': 0.17193357646465302, 'timestamp': '2025-10-01 04:17:41.759238', 'step': 4037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.799515', 'step': 4037, 'epoch': 1} {'type': 'loss', 'content': 0.1725585162639618, 'timestamp': '2025-10-01 04:17:41.801508', 'step': 4038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.841100', 'step': 4038, 'epoch': 1} {'type': 'loss', 'content': 0.16133536398410797, 'timestamp': '2025-10-01 04:17:41.844958', 'step': 4039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:41.885248', 'step': 4039, 'epoch': 1} {'type': 'loss', 'content': 0.09422235190868378, 'timestamp': '2025-10-01 04:17:41.908978', 'step': 4040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.944267', 'step': 4040, 'epoch': 1} {'type': 'loss', 'content': 0.16506518423557281, 'timestamp': '2025-10-01 04:17:41.946416', 'step': 4041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:41.977975', 'step': 4041, 'epoch': 1} {'type': 'loss', 'content': 0.1264755129814148, 'timestamp': '2025-10-01 04:17:41.980672', 'step': 4042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.012203', 'step': 4042, 'epoch': 1} {'type': 'loss', 'content': 0.20584183931350708, 'timestamp': '2025-10-01 04:17:42.014761', 'step': 4043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:42.046639', 'step': 4043, 'epoch': 1} {'type': 'loss', 'content': 0.15859436988830566, 'timestamp': '2025-10-01 04:17:42.072571', 'step': 4044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:42.110040', 'step': 4044, 'epoch': 1} {'type': 'loss', 'content': 0.14878462255001068, 'timestamp': '2025-10-01 04:17:42.112345', 'step': 4045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.151416', 'step': 4045, 'epoch': 1} {'type': 'loss', 'content': 0.11788704991340637, 'timestamp': '2025-10-01 04:17:42.163321', 'step': 4046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:42.195593', 'step': 4046, 'epoch': 1} {'type': 'loss', 'content': 0.13032124936580658, 'timestamp': '2025-10-01 04:17:42.197771', 'step': 4047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:42.232859', 'step': 4047, 'epoch': 1} {'type': 'loss', 'content': 0.22800832986831665, 'timestamp': '2025-10-01 04:17:42.256940', 'step': 4048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.290689', 'step': 4048, 'epoch': 1} {'type': 'loss', 'content': 0.12845337390899658, 'timestamp': '2025-10-01 04:17:42.292408', 'step': 4049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.326565', 'step': 4049, 'epoch': 1} {'type': 'loss', 'content': 0.2038358747959137, 'timestamp': '2025-10-01 04:17:42.328588', 'step': 4050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.361571', 'step': 4050, 'epoch': 1} {'type': 'loss', 'content': 0.15143372118473053, 'timestamp': '2025-10-01 04:17:42.363802', 'step': 4051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:42.398968', 'step': 4051, 'epoch': 1} {'type': 'loss', 'content': 0.17669585347175598, 'timestamp': '2025-10-01 04:17:42.422681', 'step': 4052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:42.457985', 'step': 4052, 'epoch': 1} {'type': 'loss', 'content': 0.0733502134680748, 'timestamp': '2025-10-01 04:17:42.460232', 'step': 4053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.501432', 'step': 4053, 'epoch': 1} {'type': 'loss', 'content': 0.23731935024261475, 'timestamp': '2025-10-01 04:17:42.503748', 'step': 4054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:42.536961', 'step': 4054, 'epoch': 1} {'type': 'loss', 'content': 0.15200971066951752, 'timestamp': '2025-10-01 04:17:42.539060', 'step': 4055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.578009', 'step': 4055, 'epoch': 1} {'type': 'loss', 'content': 0.16509664058685303, 'timestamp': '2025-10-01 04:17:42.603729', 'step': 4056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.646387', 'step': 4056, 'epoch': 1} {'type': 'loss', 'content': 0.20988164842128754, 'timestamp': '2025-10-01 04:17:42.650464', 'step': 4057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.695867', 'step': 4057, 'epoch': 1} {'type': 'loss', 'content': 0.20545649528503418, 'timestamp': '2025-10-01 04:17:42.698328', 'step': 4058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:42.741120', 'step': 4058, 'epoch': 1} {'type': 'loss', 'content': 0.1910140961408615, 'timestamp': '2025-10-01 04:17:42.743764', 'step': 4059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.775246', 'step': 4059, 'epoch': 1} {'type': 'loss', 'content': 0.184930682182312, 'timestamp': '2025-10-01 04:17:42.804039', 'step': 4060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:42.839165', 'step': 4060, 'epoch': 1} {'type': 'loss', 'content': 0.23159009218215942, 'timestamp': '2025-10-01 04:17:42.841323', 'step': 4061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:42.875436', 'step': 4061, 'epoch': 1} {'type': 'loss', 'content': 0.16731642186641693, 'timestamp': '2025-10-01 04:17:42.877233', 'step': 4062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:42.914393', 'step': 4062, 'epoch': 1} {'type': 'loss', 'content': 0.1866549849510193, 'timestamp': '2025-10-01 04:17:42.916339', 'step': 4063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:42.949210', 'step': 4063, 'epoch': 1} {'type': 'loss', 'content': 0.22951602935791016, 'timestamp': '2025-10-01 04:17:42.982450', 'step': 4064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.019684', 'step': 4064, 'epoch': 1} {'type': 'loss', 'content': 0.14800624549388885, 'timestamp': '2025-10-01 04:17:43.024786', 'step': 4065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.056553', 'step': 4065, 'epoch': 1} {'type': 'loss', 'content': 0.16430731117725372, 'timestamp': '2025-10-01 04:17:43.060727', 'step': 4066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:43.092341', 'step': 4066, 'epoch': 1} {'type': 'loss', 'content': 0.17209328711032867, 'timestamp': '2025-10-01 04:17:43.094888', 'step': 4067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.132847', 'step': 4067, 'epoch': 1} {'type': 'loss', 'content': 0.10911546647548676, 'timestamp': '2025-10-01 04:17:43.156221', 'step': 4068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:43.192559', 'step': 4068, 'epoch': 1} {'type': 'loss', 'content': 0.18228064477443695, 'timestamp': '2025-10-01 04:17:43.195997', 'step': 4069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:43.229802', 'step': 4069, 'epoch': 1} {'type': 'loss', 'content': 0.1323745846748352, 'timestamp': '2025-10-01 04:17:43.232689', 'step': 4070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.268842', 'step': 4070, 'epoch': 1} {'type': 'loss', 'content': 0.11331220716238022, 'timestamp': '2025-10-01 04:17:43.270931', 'step': 4071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.301527', 'step': 4071, 'epoch': 1} {'type': 'loss', 'content': 0.14140930771827698, 'timestamp': '2025-10-01 04:17:43.331538', 'step': 4072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.362517', 'step': 4072, 'epoch': 1} {'type': 'loss', 'content': 0.21954785287380219, 'timestamp': '2025-10-01 04:17:43.373281', 'step': 4073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:43.415028', 'step': 4073, 'epoch': 1} {'type': 'loss', 'content': 0.07718706130981445, 'timestamp': '2025-10-01 04:17:43.417278', 'step': 4074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:43.448131', 'step': 4074, 'epoch': 1} {'type': 'loss', 'content': 0.17788058519363403, 'timestamp': '2025-10-01 04:17:43.450918', 'step': 4075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:43.484589', 'step': 4075, 'epoch': 1} {'type': 'loss', 'content': 0.2037688046693802, 'timestamp': '2025-10-01 04:17:43.507936', 'step': 4076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:43.545485', 'step': 4076, 'epoch': 1} {'type': 'loss', 'content': 0.17346787452697754, 'timestamp': '2025-10-01 04:17:43.547640', 'step': 4077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.580217', 'step': 4077, 'epoch': 1} {'type': 'loss', 'content': 0.14547352492809296, 'timestamp': '2025-10-01 04:17:43.582221', 'step': 4078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.619686', 'step': 4078, 'epoch': 1} {'type': 'loss', 'content': 0.11217470467090607, 'timestamp': '2025-10-01 04:17:43.621739', 'step': 4079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.657963', 'step': 4079, 'epoch': 1} {'type': 'loss', 'content': 0.2720315158367157, 'timestamp': '2025-10-01 04:17:43.681366', 'step': 4080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.712157', 'step': 4080, 'epoch': 1} {'type': 'loss', 'content': 0.16141900420188904, 'timestamp': '2025-10-01 04:17:43.714075', 'step': 4081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.745473', 'step': 4081, 'epoch': 1} {'type': 'loss', 'content': 0.20017753541469574, 'timestamp': '2025-10-01 04:17:43.747390', 'step': 4082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.778875', 'step': 4082, 'epoch': 1} {'type': 'loss', 'content': 0.11320901662111282, 'timestamp': '2025-10-01 04:17:43.780726', 'step': 4083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:43.822701', 'step': 4083, 'epoch': 1} {'type': 'loss', 'content': 0.17964230477809906, 'timestamp': '2025-10-01 04:17:43.846278', 'step': 4084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:43.881993', 'step': 4084, 'epoch': 1} {'type': 'loss', 'content': 0.1855928599834442, 'timestamp': '2025-10-01 04:17:43.883945', 'step': 4085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:43.916908', 'step': 4085, 'epoch': 1} {'type': 'loss', 'content': 0.14363723993301392, 'timestamp': '2025-10-01 04:17:43.918897', 'step': 4086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:43.949408', 'step': 4086, 'epoch': 1} {'type': 'loss', 'content': 0.16114506125450134, 'timestamp': '2025-10-01 04:17:43.956634', 'step': 4087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:43.988454', 'step': 4087, 'epoch': 1} {'type': 'loss', 'content': 0.18478722870349884, 'timestamp': '2025-10-01 04:17:44.011974', 'step': 4088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:44.044969', 'step': 4088, 'epoch': 1} {'type': 'loss', 'content': 0.09337914735078812, 'timestamp': '2025-10-01 04:17:44.046983', 'step': 4089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:44.086020', 'step': 4089, 'epoch': 1} {'type': 'loss', 'content': 0.06118181347846985, 'timestamp': '2025-10-01 04:17:44.087998', 'step': 4090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:44.118391', 'step': 4090, 'epoch': 1} {'type': 'loss', 'content': 0.1416945606470108, 'timestamp': '2025-10-01 04:17:44.124650', 'step': 4091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:44.155227', 'step': 4091, 'epoch': 1} {'type': 'loss', 'content': 0.12274537235498428, 'timestamp': '2025-10-01 04:17:44.184134', 'step': 4092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:44.216444', 'step': 4092, 'epoch': 1} {'type': 'loss', 'content': 0.17804458737373352, 'timestamp': '2025-10-01 04:17:44.218879', 'step': 4093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:44.251811', 'step': 4093, 'epoch': 1} {'type': 'loss', 'content': 0.10476673394441605, 'timestamp': '2025-10-01 04:17:44.253658', 'step': 4094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:44.285751', 'step': 4094, 'epoch': 1} {'type': 'loss', 'content': 0.1527271270751953, 'timestamp': '2025-10-01 04:17:44.288042', 'step': 4095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:44.320311', 'step': 4095, 'epoch': 1} {'type': 'loss', 'content': 0.18748044967651367, 'timestamp': '2025-10-01 04:17:44.343741', 'step': 4096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:44.375765', 'step': 4096, 'epoch': 1} {'type': 'loss', 'content': 0.12328171730041504, 'timestamp': '2025-10-01 04:17:44.378088', 'step': 4097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:44.411619', 'step': 4097, 'epoch': 1} {'type': 'loss', 'content': 0.1560860276222229, 'timestamp': '2025-10-01 04:17:44.414381', 'step': 4098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:44.447100', 'step': 4098, 'epoch': 1} {'type': 'loss', 'content': 0.223732590675354, 'timestamp': '2025-10-01 04:17:44.449095', 'step': 4099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:44.482006', 'step': 4099, 'epoch': 1} {'type': 'loss', 'content': 0.1638982743024826, 'timestamp': '2025-10-01 04:17:44.505420', 'step': 4100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:44.537615', 'step': 4100, 'epoch': 1} {'type': 'loss', 'content': 0.16179031133651733, 'timestamp': '2025-10-01 04:17:44.539861', 'step': 4101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:44.583886', 'step': 4101, 'epoch': 1} {'type': 'loss', 'content': 0.15377092361450195, 'timestamp': '2025-10-01 04:17:44.586448', 'step': 4102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:44.617410', 'step': 4102, 'epoch': 1} {'type': 'loss', 'content': 0.2107417732477188, 'timestamp': '2025-10-01 04:17:44.619543', 'step': 4103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:44.655806', 'step': 4103, 'epoch': 1} {'type': 'loss', 'content': 0.11448121815919876, 'timestamp': '2025-10-01 04:17:44.679396', 'step': 4104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:44.713400', 'step': 4104, 'epoch': 1} {'type': 'loss', 'content': 0.09685861319303513, 'timestamp': '2025-10-01 04:17:44.715517', 'step': 4105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:44.746988', 'step': 4105, 'epoch': 1} {'type': 'loss', 'content': 0.1731543242931366, 'timestamp': '2025-10-01 04:17:44.748847', 'step': 4106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:44.781734', 'step': 4106, 'epoch': 1} {'type': 'loss', 'content': 0.19681525230407715, 'timestamp': '2025-10-01 04:17:44.783674', 'step': 4107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:44.814976', 'step': 4107, 'epoch': 1} {'type': 'loss', 'content': 0.1660478562116623, 'timestamp': '2025-10-01 04:17:44.838544', 'step': 4108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:44.871334', 'step': 4108, 'epoch': 1} {'type': 'loss', 'content': 0.1693526804447174, 'timestamp': '2025-10-01 04:17:44.873351', 'step': 4109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:44.906700', 'step': 4109, 'epoch': 1} {'type': 'loss', 'content': 0.13440662622451782, 'timestamp': '2025-10-01 04:17:44.908795', 'step': 4110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:44.940875', 'step': 4110, 'epoch': 1} {'type': 'loss', 'content': 0.2712976336479187, 'timestamp': '2025-10-01 04:17:44.942789', 'step': 4111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:44.975865', 'step': 4111, 'epoch': 1} {'type': 'loss', 'content': 0.1108526736497879, 'timestamp': '2025-10-01 04:17:44.999741', 'step': 4112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:45.030807', 'step': 4112, 'epoch': 1} {'type': 'loss', 'content': 0.13902348279953003, 'timestamp': '2025-10-01 04:17:45.032579', 'step': 4113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:45.068535', 'step': 4113, 'epoch': 1} {'type': 'loss', 'content': 0.14702634513378143, 'timestamp': '2025-10-01 04:17:45.071506', 'step': 4114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:45.105849', 'step': 4114, 'epoch': 1} {'type': 'loss', 'content': 0.12976831197738647, 'timestamp': '2025-10-01 04:17:45.108114', 'step': 4115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:45.141647', 'step': 4115, 'epoch': 1} {'type': 'loss', 'content': 0.0905611664056778, 'timestamp': '2025-10-01 04:17:45.165147', 'step': 4116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:45.197627', 'step': 4116, 'epoch': 1} {'type': 'loss', 'content': 0.16971798241138458, 'timestamp': '2025-10-01 04:17:45.199659', 'step': 4117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:45.230187', 'step': 4117, 'epoch': 1} {'type': 'loss', 'content': 0.10342250764369965, 'timestamp': '2025-10-01 04:17:45.233971', 'step': 4118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:45.266252', 'step': 4118, 'epoch': 1} {'type': 'loss', 'content': 0.17501020431518555, 'timestamp': '2025-10-01 04:17:45.268136', 'step': 4119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:45.298375', 'step': 4119, 'epoch': 1} {'type': 'loss', 'content': 0.1423405259847641, 'timestamp': '2025-10-01 04:17:45.321893', 'step': 4120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:45.352089', 'step': 4120, 'epoch': 1} {'type': 'loss', 'content': 0.19137445092201233, 'timestamp': '2025-10-01 04:17:45.358405', 'step': 4121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:45.389717', 'step': 4121, 'epoch': 1} {'type': 'loss', 'content': 0.14218397438526154, 'timestamp': '2025-10-01 04:17:45.391457', 'step': 4122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:45.428796', 'step': 4122, 'epoch': 1} {'type': 'loss', 'content': 0.2101791501045227, 'timestamp': '2025-10-01 04:17:45.430762', 'step': 4123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:45.462148', 'step': 4123, 'epoch': 1} {'type': 'loss', 'content': 0.19941309094429016, 'timestamp': '2025-10-01 04:17:45.486233', 'step': 4124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:45.517497', 'step': 4124, 'epoch': 1} {'type': 'loss', 'content': 0.18675516545772552, 'timestamp': '2025-10-01 04:17:45.523267', 'step': 4125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:45.554118', 'step': 4125, 'epoch': 1} {'type': 'loss', 'content': 0.2180376946926117, 'timestamp': '2025-10-01 04:17:45.555861', 'step': 4126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:45.589061', 'step': 4126, 'epoch': 1} {'type': 'loss', 'content': 0.10279490798711777, 'timestamp': '2025-10-01 04:17:45.591695', 'step': 4127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:45.628749', 'step': 4127, 'epoch': 1} {'type': 'loss', 'content': 0.13350817561149597, 'timestamp': '2025-10-01 04:17:45.657317', 'step': 4128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:45.692968', 'step': 4128, 'epoch': 1} {'type': 'loss', 'content': 0.07973143458366394, 'timestamp': '2025-10-01 04:17:45.694941', 'step': 4129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:45.730434', 'step': 4129, 'epoch': 1} {'type': 'loss', 'content': 0.17757296562194824, 'timestamp': '2025-10-01 04:17:45.733326', 'step': 4130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:45.769928', 'step': 4130, 'epoch': 1} {'type': 'loss', 'content': 0.17743295431137085, 'timestamp': '2025-10-01 04:17:45.771944', 'step': 4131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:45.813874', 'step': 4131, 'epoch': 1} {'type': 'loss', 'content': 0.1289568692445755, 'timestamp': '2025-10-01 04:17:45.837567', 'step': 4132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:45.871767', 'step': 4132, 'epoch': 1} {'type': 'loss', 'content': 0.2404954433441162, 'timestamp': '2025-10-01 04:17:45.873767', 'step': 4133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:45.905411', 'step': 4133, 'epoch': 1} {'type': 'loss', 'content': 0.1868768185377121, 'timestamp': '2025-10-01 04:17:45.907844', 'step': 4134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:45.940684', 'step': 4134, 'epoch': 1} {'type': 'loss', 'content': 0.1763831526041031, 'timestamp': '2025-10-01 04:17:45.942644', 'step': 4135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:45.975888', 'step': 4135, 'epoch': 1} {'type': 'loss', 'content': 0.20711392164230347, 'timestamp': '2025-10-01 04:17:45.999957', 'step': 4136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:46.030902', 'step': 4136, 'epoch': 1} {'type': 'loss', 'content': 0.10392142832279205, 'timestamp': '2025-10-01 04:17:46.032801', 'step': 4137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:46.064528', 'step': 4137, 'epoch': 1} {'type': 'loss', 'content': 0.18928314745426178, 'timestamp': '2025-10-01 04:17:46.066937', 'step': 4138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:46.110507', 'step': 4138, 'epoch': 1} {'type': 'loss', 'content': 0.1985127180814743, 'timestamp': '2025-10-01 04:17:46.112480', 'step': 4139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:46.149164', 'step': 4139, 'epoch': 1} {'type': 'loss', 'content': 0.11502496153116226, 'timestamp': '2025-10-01 04:17:46.172737', 'step': 4140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:46.205747', 'step': 4140, 'epoch': 1} {'type': 'loss', 'content': 0.13095571100711823, 'timestamp': '2025-10-01 04:17:46.209898', 'step': 4141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:46.245622', 'step': 4141, 'epoch': 1} {'type': 'loss', 'content': 0.08708856999874115, 'timestamp': '2025-10-01 04:17:46.253528', 'step': 4142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:46.284551', 'step': 4142, 'epoch': 1} {'type': 'loss', 'content': 0.15424776077270508, 'timestamp': '2025-10-01 04:17:46.287342', 'step': 4143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:46.318447', 'step': 4143, 'epoch': 1} {'type': 'loss', 'content': 0.155840203166008, 'timestamp': '2025-10-01 04:17:46.341949', 'step': 4144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:46.378192', 'step': 4144, 'epoch': 1} {'type': 'loss', 'content': 0.15888871252536774, 'timestamp': '2025-10-01 04:17:46.380217', 'step': 4145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:46.410415', 'step': 4145, 'epoch': 1} {'type': 'loss', 'content': 0.18821749091148376, 'timestamp': '2025-10-01 04:17:46.412460', 'step': 4146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:46.445719', 'step': 4146, 'epoch': 1} {'type': 'loss', 'content': 0.19874168932437897, 'timestamp': '2025-10-01 04:17:46.448548', 'step': 4147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:46.484678', 'step': 4147, 'epoch': 1} {'type': 'loss', 'content': 0.13570937514305115, 'timestamp': '2025-10-01 04:17:46.508418', 'step': 4148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:46.540509', 'step': 4148, 'epoch': 1} {'type': 'loss', 'content': 0.09646832197904587, 'timestamp': '2025-10-01 04:17:46.542462', 'step': 4149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:46.578163', 'step': 4149, 'epoch': 1} {'type': 'loss', 'content': 0.17148391902446747, 'timestamp': '2025-10-01 04:17:46.580592', 'step': 4150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:46.616556', 'step': 4150, 'epoch': 1} {'type': 'loss', 'content': 0.17666016519069672, 'timestamp': '2025-10-01 04:17:46.618577', 'step': 4151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:46.649468', 'step': 4151, 'epoch': 1} {'type': 'loss', 'content': 0.08570815622806549, 'timestamp': '2025-10-01 04:17:46.673638', 'step': 4152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:46.719429', 'step': 4152, 'epoch': 1} {'type': 'loss', 'content': 0.17295260727405548, 'timestamp': '2025-10-01 04:17:46.721439', 'step': 4153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:46.758531', 'step': 4153, 'epoch': 1} {'type': 'loss', 'content': 0.19773046672344208, 'timestamp': '2025-10-01 04:17:46.760623', 'step': 4154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:46.796143', 'step': 4154, 'epoch': 1} {'type': 'loss', 'content': 0.06662508845329285, 'timestamp': '2025-10-01 04:17:46.798142', 'step': 4155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:46.831245', 'step': 4155, 'epoch': 1} {'type': 'loss', 'content': 0.17075175046920776, 'timestamp': '2025-10-01 04:17:46.857507', 'step': 4156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:46.893730', 'step': 4156, 'epoch': 1} {'type': 'loss', 'content': 0.21270574629306793, 'timestamp': '2025-10-01 04:17:46.895322', 'step': 4157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:17:46.935692', 'step': 4157, 'epoch': 1} {'type': 'loss', 'content': 0.13679403066635132, 'timestamp': '2025-10-01 04:17:46.938499', 'step': 4158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:46.969539', 'step': 4158, 'epoch': 1} {'type': 'loss', 'content': 0.19513645768165588, 'timestamp': '2025-10-01 04:17:46.971817', 'step': 4159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.005922', 'step': 4159, 'epoch': 1} {'type': 'loss', 'content': 0.2072363793849945, 'timestamp': '2025-10-01 04:17:47.029398', 'step': 4160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.060448', 'step': 4160, 'epoch': 1} {'type': 'loss', 'content': 0.11979050189256668, 'timestamp': '2025-10-01 04:17:47.062639', 'step': 4161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:47.102099', 'step': 4161, 'epoch': 1} {'type': 'loss', 'content': 0.20016899704933167, 'timestamp': '2025-10-01 04:17:47.104440', 'step': 4162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.152015', 'step': 4162, 'epoch': 1} {'type': 'loss', 'content': 0.16695596277713776, 'timestamp': '2025-10-01 04:17:47.154028', 'step': 4163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:47.186175', 'step': 4163, 'epoch': 1} {'type': 'loss', 'content': 0.07914472371339798, 'timestamp': '2025-10-01 04:17:47.209844', 'step': 4164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.244260', 'step': 4164, 'epoch': 1} {'type': 'loss', 'content': 0.14736780524253845, 'timestamp': '2025-10-01 04:17:47.246337', 'step': 4165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.280827', 'step': 4165, 'epoch': 1} {'type': 'loss', 'content': 0.16870327293872833, 'timestamp': '2025-10-01 04:17:47.282741', 'step': 4166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.314806', 'step': 4166, 'epoch': 1} {'type': 'loss', 'content': 0.14425073564052582, 'timestamp': '2025-10-01 04:17:47.316867', 'step': 4167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.348126', 'step': 4167, 'epoch': 1} {'type': 'loss', 'content': 0.1046357974410057, 'timestamp': '2025-10-01 04:17:47.371587', 'step': 4168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:47.406404', 'step': 4168, 'epoch': 1} {'type': 'loss', 'content': 0.1081916019320488, 'timestamp': '2025-10-01 04:17:47.408466', 'step': 4169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:47.441045', 'step': 4169, 'epoch': 1} {'type': 'loss', 'content': 0.1169782504439354, 'timestamp': '2025-10-01 04:17:47.443421', 'step': 4170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:47.477077', 'step': 4170, 'epoch': 1} {'type': 'loss', 'content': 0.12677958607673645, 'timestamp': '2025-10-01 04:17:47.479761', 'step': 4171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:47.513068', 'step': 4171, 'epoch': 1} {'type': 'loss', 'content': 0.13065291941165924, 'timestamp': '2025-10-01 04:17:47.536736', 'step': 4172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:47.570042', 'step': 4172, 'epoch': 1} {'type': 'loss', 'content': 0.15165114402770996, 'timestamp': '2025-10-01 04:17:47.572059', 'step': 4173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:47.606102', 'step': 4173, 'epoch': 1} {'type': 'loss', 'content': 0.11587774008512497, 'timestamp': '2025-10-01 04:17:47.608106', 'step': 4174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.645406', 'step': 4174, 'epoch': 1} {'type': 'loss', 'content': 0.0994749665260315, 'timestamp': '2025-10-01 04:17:47.647642', 'step': 4175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:47.680211', 'step': 4175, 'epoch': 1} {'type': 'loss', 'content': 0.1005425900220871, 'timestamp': '2025-10-01 04:17:47.704545', 'step': 4176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.745434', 'step': 4176, 'epoch': 1} {'type': 'loss', 'content': 0.18279623985290527, 'timestamp': '2025-10-01 04:17:47.747227', 'step': 4177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.782479', 'step': 4177, 'epoch': 1} {'type': 'loss', 'content': 0.16338346898555756, 'timestamp': '2025-10-01 04:17:47.784462', 'step': 4178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:47.815161', 'step': 4178, 'epoch': 1} {'type': 'loss', 'content': 0.18821878731250763, 'timestamp': '2025-10-01 04:17:47.817059', 'step': 4179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:47.848092', 'step': 4179, 'epoch': 1} {'type': 'loss', 'content': 0.13663773238658905, 'timestamp': '2025-10-01 04:17:47.871542', 'step': 4180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.902257', 'step': 4180, 'epoch': 1} {'type': 'loss', 'content': 0.05021115019917488, 'timestamp': '2025-10-01 04:17:47.904387', 'step': 4181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.935708', 'step': 4181, 'epoch': 1} {'type': 'loss', 'content': 0.12906260788440704, 'timestamp': '2025-10-01 04:17:47.937745', 'step': 4182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:47.971012', 'step': 4182, 'epoch': 1} {'type': 'loss', 'content': 0.21962353587150574, 'timestamp': '2025-10-01 04:17:47.973385', 'step': 4183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:48.004576', 'step': 4183, 'epoch': 1} {'type': 'loss', 'content': 0.12722758948802948, 'timestamp': '2025-10-01 04:17:48.027960', 'step': 4184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:48.058689', 'step': 4184, 'epoch': 1} {'type': 'loss', 'content': 0.1517452746629715, 'timestamp': '2025-10-01 04:17:48.060650', 'step': 4185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:48.094681', 'step': 4185, 'epoch': 1} {'type': 'loss', 'content': 0.10387668758630753, 'timestamp': '2025-10-01 04:17:48.096475', 'step': 4186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:48.129731', 'step': 4186, 'epoch': 1} {'type': 'loss', 'content': 0.20627029240131378, 'timestamp': '2025-10-01 04:17:48.131913', 'step': 4187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:48.163221', 'step': 4187, 'epoch': 1} {'type': 'loss', 'content': 0.10123749077320099, 'timestamp': '2025-10-01 04:17:48.187143', 'step': 4188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:48.221175', 'step': 4188, 'epoch': 1} {'type': 'loss', 'content': 0.09886141121387482, 'timestamp': '2025-10-01 04:17:48.223286', 'step': 4189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:48.256796', 'step': 4189, 'epoch': 1} {'type': 'loss', 'content': 0.22394628822803497, 'timestamp': '2025-10-01 04:17:48.259095', 'step': 4190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:48.294526', 'step': 4190, 'epoch': 1} {'type': 'loss', 'content': 0.1357613503932953, 'timestamp': '2025-10-01 04:17:48.296497', 'step': 4191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:48.326854', 'step': 4191, 'epoch': 1} {'type': 'loss', 'content': 0.149240180850029, 'timestamp': '2025-10-01 04:17:48.350852', 'step': 4192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:48.381627', 'step': 4192, 'epoch': 1} {'type': 'loss', 'content': 0.1601518988609314, 'timestamp': '2025-10-01 04:17:48.383650', 'step': 4193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:17:48.416363', 'step': 4193, 'epoch': 1} {'type': 'loss', 'content': 0.21891610324382782, 'timestamp': '2025-10-01 04:17:48.418339', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:17:59.513536', 'step': 4194, 'epoch': 1} {'type': 'pplx', 'content': 8821.26945189792, 'timestamp': '2025-10-01 04:17:59.516749', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:59.547627', 'step': 4194, 'epoch': 1} {'type': 'loss', 'content': 0.18082880973815918, 'timestamp': '2025-10-01 04:17:59.549454', 'step': 4195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:17:59.585412', 'step': 4195, 'epoch': 1} {'type': 'loss', 'content': 0.1243201419711113, 'timestamp': '2025-10-01 04:17:59.609407', 'step': 4196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:59.643049', 'step': 4196, 'epoch': 1} {'type': 'loss', 'content': 0.13467000424861908, 'timestamp': '2025-10-01 04:17:59.646236', 'step': 4197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:59.679222', 'step': 4197, 'epoch': 1} {'type': 'loss', 'content': 0.12718145549297333, 'timestamp': '2025-10-01 04:17:59.681333', 'step': 4198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:59.712919', 'step': 4198, 'epoch': 1} {'type': 'loss', 'content': 0.11906956136226654, 'timestamp': '2025-10-01 04:17:59.715091', 'step': 4199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:59.748451', 'step': 4199, 'epoch': 1} {'type': 'loss', 'content': 0.16826826333999634, 'timestamp': '2025-10-01 04:17:59.772365', 'step': 4200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:59.812932', 'step': 4200, 'epoch': 1} {'type': 'loss', 'content': 0.1043902039527893, 'timestamp': '2025-10-01 04:17:59.814729', 'step': 4201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:59.854446', 'step': 4201, 'epoch': 1} {'type': 'loss', 'content': 0.23141635954380035, 'timestamp': '2025-10-01 04:17:59.857397', 'step': 4202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:17:59.889893', 'step': 4202, 'epoch': 1} {'type': 'loss', 'content': 0.20056018233299255, 'timestamp': '2025-10-01 04:17:59.892569', 'step': 4203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:17:59.925677', 'step': 4203, 'epoch': 1} {'type': 'loss', 'content': 0.24584341049194336, 'timestamp': '2025-10-01 04:17:59.949223', 'step': 4204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:17:59.983311', 'step': 4204, 'epoch': 1} {'type': 'loss', 'content': 0.13896967470645905, 'timestamp': '2025-10-01 04:17:59.985132', 'step': 4205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.041580', 'step': 4205, 'epoch': 1} {'type': 'loss', 'content': 0.17320936918258667, 'timestamp': '2025-10-01 04:18:00.043370', 'step': 4206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.073991', 'step': 4206, 'epoch': 1} {'type': 'loss', 'content': 0.14835937321186066, 'timestamp': '2025-10-01 04:18:00.076134', 'step': 4207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.107538', 'step': 4207, 'epoch': 1} {'type': 'loss', 'content': 0.12999603152275085, 'timestamp': '2025-10-01 04:18:00.131016', 'step': 4208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:00.162916', 'step': 4208, 'epoch': 1} {'type': 'loss', 'content': 0.2124742865562439, 'timestamp': '2025-10-01 04:18:00.164823', 'step': 4209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:00.196287', 'step': 4209, 'epoch': 1} {'type': 'loss', 'content': 0.19424207508563995, 'timestamp': '2025-10-01 04:18:00.198179', 'step': 4210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.230168', 'step': 4210, 'epoch': 1} {'type': 'loss', 'content': 0.12433721125125885, 'timestamp': '2025-10-01 04:18:00.232611', 'step': 4211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.266242', 'step': 4211, 'epoch': 1} {'type': 'loss', 'content': 0.29134464263916016, 'timestamp': '2025-10-01 04:18:00.289737', 'step': 4212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.340512', 'step': 4212, 'epoch': 1} {'type': 'loss', 'content': 0.11942646652460098, 'timestamp': '2025-10-01 04:18:00.343062', 'step': 4213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:00.376302', 'step': 4213, 'epoch': 1} {'type': 'loss', 'content': 0.14911684393882751, 'timestamp': '2025-10-01 04:18:00.378568', 'step': 4214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.435419', 'step': 4214, 'epoch': 1} {'type': 'loss', 'content': 0.18311072885990143, 'timestamp': '2025-10-01 04:18:00.437889', 'step': 4215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:00.475281', 'step': 4215, 'epoch': 1} {'type': 'loss', 'content': 0.16553731262683868, 'timestamp': '2025-10-01 04:18:00.498933', 'step': 4216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.532568', 'step': 4216, 'epoch': 1} {'type': 'loss', 'content': 0.15677215158939362, 'timestamp': '2025-10-01 04:18:00.535032', 'step': 4217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:00.568981', 'step': 4217, 'epoch': 1} {'type': 'loss', 'content': 0.238072469830513, 'timestamp': '2025-10-01 04:18:00.571070', 'step': 4218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.603075', 'step': 4218, 'epoch': 1} {'type': 'loss', 'content': 0.18031755089759827, 'timestamp': '2025-10-01 04:18:00.605040', 'step': 4219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:00.648938', 'step': 4219, 'epoch': 1} {'type': 'loss', 'content': 0.14386877417564392, 'timestamp': '2025-10-01 04:18:00.672666', 'step': 4220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:00.704453', 'step': 4220, 'epoch': 1} {'type': 'loss', 'content': 0.1017494797706604, 'timestamp': '2025-10-01 04:18:00.706500', 'step': 4221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.740170', 'step': 4221, 'epoch': 1} {'type': 'loss', 'content': 0.10019083321094513, 'timestamp': '2025-10-01 04:18:00.743603', 'step': 4222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:00.785732', 'step': 4222, 'epoch': 1} {'type': 'loss', 'content': 0.20421890914440155, 'timestamp': '2025-10-01 04:18:00.787937', 'step': 4223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:00.824213', 'step': 4223, 'epoch': 1} {'type': 'loss', 'content': 0.19517111778259277, 'timestamp': '2025-10-01 04:18:00.847741', 'step': 4224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:00.887160', 'step': 4224, 'epoch': 1} {'type': 'loss', 'content': 0.1216769739985466, 'timestamp': '2025-10-01 04:18:00.889075', 'step': 4225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:00.924913', 'step': 4225, 'epoch': 1} {'type': 'loss', 'content': 0.21288475394248962, 'timestamp': '2025-10-01 04:18:00.927462', 'step': 4226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:00.960625', 'step': 4226, 'epoch': 1} {'type': 'loss', 'content': 0.2546406686306, 'timestamp': '2025-10-01 04:18:00.962888', 'step': 4227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:00.998085', 'step': 4227, 'epoch': 1} {'type': 'loss', 'content': 0.18452627956867218, 'timestamp': '2025-10-01 04:18:01.021975', 'step': 4228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:01.055381', 'step': 4228, 'epoch': 1} {'type': 'loss', 'content': 0.08959493041038513, 'timestamp': '2025-10-01 04:18:01.057599', 'step': 4229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:01.090048', 'step': 4229, 'epoch': 1} {'type': 'loss', 'content': 0.12139608711004257, 'timestamp': '2025-10-01 04:18:01.094562', 'step': 4230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:01.126430', 'step': 4230, 'epoch': 1} {'type': 'loss', 'content': 0.11635943502187729, 'timestamp': '2025-10-01 04:18:01.128485', 'step': 4231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:01.167119', 'step': 4231, 'epoch': 1} {'type': 'loss', 'content': 0.13437266647815704, 'timestamp': '2025-10-01 04:18:01.190326', 'step': 4232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.224296', 'step': 4232, 'epoch': 1} {'type': 'loss', 'content': 0.22915032505989075, 'timestamp': '2025-10-01 04:18:01.239591', 'step': 4233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:01.269900', 'step': 4233, 'epoch': 1} {'type': 'loss', 'content': 0.22756411135196686, 'timestamp': '2025-10-01 04:18:01.271873', 'step': 4234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:01.303573', 'step': 4234, 'epoch': 1} {'type': 'loss', 'content': 0.28266602754592896, 'timestamp': '2025-10-01 04:18:01.306181', 'step': 4235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.338071', 'step': 4235, 'epoch': 1} {'type': 'loss', 'content': 0.19792373478412628, 'timestamp': '2025-10-01 04:18:01.361856', 'step': 4236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.399477', 'step': 4236, 'epoch': 1} {'type': 'loss', 'content': 0.17471133172512054, 'timestamp': '2025-10-01 04:18:01.401564', 'step': 4237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.431812', 'step': 4237, 'epoch': 1} {'type': 'loss', 'content': 0.1285422295331955, 'timestamp': '2025-10-01 04:18:01.436216', 'step': 4238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.471743', 'step': 4238, 'epoch': 1} {'type': 'loss', 'content': 0.16292797029018402, 'timestamp': '2025-10-01 04:18:01.474418', 'step': 4239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.513287', 'step': 4239, 'epoch': 1} {'type': 'loss', 'content': 0.057828426361083984, 'timestamp': '2025-10-01 04:18:01.536925', 'step': 4240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:01.568144', 'step': 4240, 'epoch': 1} {'type': 'loss', 'content': 0.14695514738559723, 'timestamp': '2025-10-01 04:18:01.570301', 'step': 4241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:01.602744', 'step': 4241, 'epoch': 1} {'type': 'loss', 'content': 0.12272555381059647, 'timestamp': '2025-10-01 04:18:01.604986', 'step': 4242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:01.644714', 'step': 4242, 'epoch': 1} {'type': 'loss', 'content': 0.22131980955600739, 'timestamp': '2025-10-01 04:18:01.647189', 'step': 4243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.689961', 'step': 4243, 'epoch': 1} {'type': 'loss', 'content': 0.16089780628681183, 'timestamp': '2025-10-01 04:18:01.713787', 'step': 4244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.745302', 'step': 4244, 'epoch': 1} {'type': 'loss', 'content': 0.11924944818019867, 'timestamp': '2025-10-01 04:18:01.750531', 'step': 4245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.782511', 'step': 4245, 'epoch': 1} {'type': 'loss', 'content': 0.165483295917511, 'timestamp': '2025-10-01 04:18:01.784446', 'step': 4246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.816485', 'step': 4246, 'epoch': 1} {'type': 'loss', 'content': 0.24294722080230713, 'timestamp': '2025-10-01 04:18:01.824055', 'step': 4247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:01.861525', 'step': 4247, 'epoch': 1} {'type': 'loss', 'content': 0.16969555616378784, 'timestamp': '2025-10-01 04:18:01.885027', 'step': 4248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.916696', 'step': 4248, 'epoch': 1} {'type': 'loss', 'content': 0.2386125922203064, 'timestamp': '2025-10-01 04:18:01.918781', 'step': 4249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:01.963548', 'step': 4249, 'epoch': 1} {'type': 'loss', 'content': 0.29503506422042847, 'timestamp': '2025-10-01 04:18:01.966000', 'step': 4250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:01.998283', 'step': 4250, 'epoch': 1} {'type': 'loss', 'content': 0.1840905100107193, 'timestamp': '2025-10-01 04:18:02.000254', 'step': 4251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:02.036228', 'step': 4251, 'epoch': 1} {'type': 'loss', 'content': 0.16052229702472687, 'timestamp': '2025-10-01 04:18:02.059703', 'step': 4252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:02.094989', 'step': 4252, 'epoch': 1} {'type': 'loss', 'content': 0.13669753074645996, 'timestamp': '2025-10-01 04:18:02.097163', 'step': 4253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:02.141345', 'step': 4253, 'epoch': 1} {'type': 'loss', 'content': 0.17201431095600128, 'timestamp': '2025-10-01 04:18:02.143295', 'step': 4254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:02.177477', 'step': 4254, 'epoch': 1} {'type': 'loss', 'content': 0.14706487953662872, 'timestamp': '2025-10-01 04:18:02.180941', 'step': 4255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:18:02.218126', 'step': 4255, 'epoch': 1} {'type': 'loss', 'content': 0.10568635165691376, 'timestamp': '2025-10-01 04:18:02.243394', 'step': 4256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:02.288297', 'step': 4256, 'epoch': 1} {'type': 'loss', 'content': 0.20162752270698547, 'timestamp': '2025-10-01 04:18:02.290472', 'step': 4257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:02.329847', 'step': 4257, 'epoch': 1} {'type': 'loss', 'content': 0.277181476354599, 'timestamp': '2025-10-01 04:18:02.332387', 'step': 4258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:02.380868', 'step': 4258, 'epoch': 1} {'type': 'loss', 'content': 0.1484505534172058, 'timestamp': '2025-10-01 04:18:02.382769', 'step': 4259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:02.418158', 'step': 4259, 'epoch': 1} {'type': 'loss', 'content': 0.11474492400884628, 'timestamp': '2025-10-01 04:18:02.441639', 'step': 4260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:02.476849', 'step': 4260, 'epoch': 1} {'type': 'loss', 'content': 0.17660586535930634, 'timestamp': '2025-10-01 04:18:02.478784', 'step': 4261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:02.511887', 'step': 4261, 'epoch': 1} {'type': 'loss', 'content': 0.16215211153030396, 'timestamp': '2025-10-01 04:18:02.513884', 'step': 4262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:02.545114', 'step': 4262, 'epoch': 1} {'type': 'loss', 'content': 0.1674676388502121, 'timestamp': '2025-10-01 04:18:02.547060', 'step': 4263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:02.585312', 'step': 4263, 'epoch': 1} {'type': 'loss', 'content': 0.0903768241405487, 'timestamp': '2025-10-01 04:18:02.608719', 'step': 4264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:02.652940', 'step': 4264, 'epoch': 1} {'type': 'loss', 'content': 0.12610045075416565, 'timestamp': '2025-10-01 04:18:02.654937', 'step': 4265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:02.686291', 'step': 4265, 'epoch': 1} {'type': 'loss', 'content': 0.1934346854686737, 'timestamp': '2025-10-01 04:18:02.688274', 'step': 4266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:02.720468', 'step': 4266, 'epoch': 1} {'type': 'loss', 'content': 0.14128738641738892, 'timestamp': '2025-10-01 04:18:02.723252', 'step': 4267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:02.756265', 'step': 4267, 'epoch': 1} {'type': 'loss', 'content': 0.19049367308616638, 'timestamp': '2025-10-01 04:18:02.779830', 'step': 4268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:02.813184', 'step': 4268, 'epoch': 1} {'type': 'loss', 'content': 0.19421403110027313, 'timestamp': '2025-10-01 04:18:02.817671', 'step': 4269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:02.850572', 'step': 4269, 'epoch': 1} {'type': 'loss', 'content': 0.07985328137874603, 'timestamp': '2025-10-01 04:18:02.852590', 'step': 4270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:02.892804', 'step': 4270, 'epoch': 1} {'type': 'loss', 'content': 0.14488789439201355, 'timestamp': '2025-10-01 04:18:02.894985', 'step': 4271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:02.933963', 'step': 4271, 'epoch': 1} {'type': 'loss', 'content': 0.12749581038951874, 'timestamp': '2025-10-01 04:18:02.957622', 'step': 4272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:02.990495', 'step': 4272, 'epoch': 1} {'type': 'loss', 'content': 0.1647322177886963, 'timestamp': '2025-10-01 04:18:02.993311', 'step': 4273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:03.033991', 'step': 4273, 'epoch': 1} {'type': 'loss', 'content': 0.23762746155261993, 'timestamp': '2025-10-01 04:18:03.036007', 'step': 4274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:03.079166', 'step': 4274, 'epoch': 1} {'type': 'loss', 'content': 0.13556942343711853, 'timestamp': '2025-10-01 04:18:03.081644', 'step': 4275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:03.114245', 'step': 4275, 'epoch': 1} {'type': 'loss', 'content': 0.13934755325317383, 'timestamp': '2025-10-01 04:18:03.137613', 'step': 4276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:03.178541', 'step': 4276, 'epoch': 1} {'type': 'loss', 'content': 0.10755788534879684, 'timestamp': '2025-10-01 04:18:03.180778', 'step': 4277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:03.213175', 'step': 4277, 'epoch': 1} {'type': 'loss', 'content': 0.08317907154560089, 'timestamp': '2025-10-01 04:18:03.215498', 'step': 4278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:03.247957', 'step': 4278, 'epoch': 1} {'type': 'loss', 'content': 0.1595313400030136, 'timestamp': '2025-10-01 04:18:03.250491', 'step': 4279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:03.289838', 'step': 4279, 'epoch': 1} {'type': 'loss', 'content': 0.2161133587360382, 'timestamp': '2025-10-01 04:18:03.313300', 'step': 4280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:03.346170', 'step': 4280, 'epoch': 1} {'type': 'loss', 'content': 0.13549606502056122, 'timestamp': '2025-10-01 04:18:03.348083', 'step': 4281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:03.381476', 'step': 4281, 'epoch': 1} {'type': 'loss', 'content': 0.0936107188463211, 'timestamp': '2025-10-01 04:18:03.383738', 'step': 4282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:03.428811', 'step': 4282, 'epoch': 1} {'type': 'loss', 'content': 0.1315639317035675, 'timestamp': '2025-10-01 04:18:03.431256', 'step': 4283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:03.465345', 'step': 4283, 'epoch': 1} {'type': 'loss', 'content': 0.1958756148815155, 'timestamp': '2025-10-01 04:18:03.489192', 'step': 4284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:03.521585', 'step': 4284, 'epoch': 1} {'type': 'loss', 'content': 0.1621837615966797, 'timestamp': '2025-10-01 04:18:03.523655', 'step': 4285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:03.556023', 'step': 4285, 'epoch': 1} {'type': 'loss', 'content': 0.18610930442810059, 'timestamp': '2025-10-01 04:18:03.558087', 'step': 4286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:03.588981', 'step': 4286, 'epoch': 1} {'type': 'loss', 'content': 0.17085273563861847, 'timestamp': '2025-10-01 04:18:03.598185', 'step': 4287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:03.632608', 'step': 4287, 'epoch': 1} {'type': 'loss', 'content': 0.16349531710147858, 'timestamp': '2025-10-01 04:18:03.655827', 'step': 4288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:03.694198', 'step': 4288, 'epoch': 1} {'type': 'loss', 'content': 0.19538414478302002, 'timestamp': '2025-10-01 04:18:03.696120', 'step': 4289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:03.727661', 'step': 4289, 'epoch': 1} {'type': 'loss', 'content': 0.23346376419067383, 'timestamp': '2025-10-01 04:18:03.729655', 'step': 4290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:03.761686', 'step': 4290, 'epoch': 1} {'type': 'loss', 'content': 0.13909858465194702, 'timestamp': '2025-10-01 04:18:03.772298', 'step': 4291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:03.805192', 'step': 4291, 'epoch': 1} {'type': 'loss', 'content': 0.16287264227867126, 'timestamp': '2025-10-01 04:18:03.829140', 'step': 4292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:03.861564', 'step': 4292, 'epoch': 1} {'type': 'loss', 'content': 0.26901423931121826, 'timestamp': '2025-10-01 04:18:03.864154', 'step': 4293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:03.897150', 'step': 4293, 'epoch': 1} {'type': 'loss', 'content': 0.11904411762952805, 'timestamp': '2025-10-01 04:18:03.899410', 'step': 4294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:03.933557', 'step': 4294, 'epoch': 1} {'type': 'loss', 'content': 0.12703189253807068, 'timestamp': '2025-10-01 04:18:03.936231', 'step': 4295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:03.968998', 'step': 4295, 'epoch': 1} {'type': 'loss', 'content': 0.15976977348327637, 'timestamp': '2025-10-01 04:18:03.993439', 'step': 4296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.024698', 'step': 4296, 'epoch': 1} {'type': 'loss', 'content': 0.1708516925573349, 'timestamp': '2025-10-01 04:18:04.027784', 'step': 4297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:04.060876', 'step': 4297, 'epoch': 1} {'type': 'loss', 'content': 0.11727268993854523, 'timestamp': '2025-10-01 04:18:04.063304', 'step': 4298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.103131', 'step': 4298, 'epoch': 1} {'type': 'loss', 'content': 0.24666175246238708, 'timestamp': '2025-10-01 04:18:04.105507', 'step': 4299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.136176', 'step': 4299, 'epoch': 1} {'type': 'loss', 'content': 0.15476427972316742, 'timestamp': '2025-10-01 04:18:04.159782', 'step': 4300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:18:04.204315', 'step': 4300, 'epoch': 1} {'type': 'loss', 'content': 0.2023463398218155, 'timestamp': '2025-10-01 04:18:04.206345', 'step': 4301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:04.237389', 'step': 4301, 'epoch': 1} {'type': 'loss', 'content': 0.1041761115193367, 'timestamp': '2025-10-01 04:18:04.239361', 'step': 4302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.270618', 'step': 4302, 'epoch': 1} {'type': 'loss', 'content': 0.17256321012973785, 'timestamp': '2025-10-01 04:18:04.272636', 'step': 4303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.308457', 'step': 4303, 'epoch': 1} {'type': 'loss', 'content': 0.11726254969835281, 'timestamp': '2025-10-01 04:18:04.331895', 'step': 4304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.364066', 'step': 4304, 'epoch': 1} {'type': 'loss', 'content': 0.18408846855163574, 'timestamp': '2025-10-01 04:18:04.366025', 'step': 4305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:04.405995', 'step': 4305, 'epoch': 1} {'type': 'loss', 'content': 0.0951966717839241, 'timestamp': '2025-10-01 04:18:04.408834', 'step': 4306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:04.440683', 'step': 4306, 'epoch': 1} {'type': 'loss', 'content': 0.11666475236415863, 'timestamp': '2025-10-01 04:18:04.443674', 'step': 4307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:04.476358', 'step': 4307, 'epoch': 1} {'type': 'loss', 'content': 0.17837120592594147, 'timestamp': '2025-10-01 04:18:04.500318', 'step': 4308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.533083', 'step': 4308, 'epoch': 1} {'type': 'loss', 'content': 0.14503951370716095, 'timestamp': '2025-10-01 04:18:04.535006', 'step': 4309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:04.566771', 'step': 4309, 'epoch': 1} {'type': 'loss', 'content': 0.1921236217021942, 'timestamp': '2025-10-01 04:18:04.568940', 'step': 4310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:04.600556', 'step': 4310, 'epoch': 1} {'type': 'loss', 'content': 0.060624949634075165, 'timestamp': '2025-10-01 04:18:04.603223', 'step': 4311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.646175', 'step': 4311, 'epoch': 1} {'type': 'loss', 'content': 0.1883619725704193, 'timestamp': '2025-10-01 04:18:04.669598', 'step': 4312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.702464', 'step': 4312, 'epoch': 1} {'type': 'loss', 'content': 0.11865489184856415, 'timestamp': '2025-10-01 04:18:04.704450', 'step': 4313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.744521', 'step': 4313, 'epoch': 1} {'type': 'loss', 'content': 0.19768032431602478, 'timestamp': '2025-10-01 04:18:04.746366', 'step': 4314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:04.791508', 'step': 4314, 'epoch': 1} {'type': 'loss', 'content': 0.2266610860824585, 'timestamp': '2025-10-01 04:18:04.793817', 'step': 4315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:04.832594', 'step': 4315, 'epoch': 1} {'type': 'loss', 'content': 0.11242015659809113, 'timestamp': '2025-10-01 04:18:04.856169', 'step': 4316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:04.889682', 'step': 4316, 'epoch': 1} {'type': 'loss', 'content': 0.2854886054992676, 'timestamp': '2025-10-01 04:18:04.891955', 'step': 4317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:04.925324', 'step': 4317, 'epoch': 1} {'type': 'loss', 'content': 0.11348680406808853, 'timestamp': '2025-10-01 04:18:04.927389', 'step': 4318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:04.974549', 'step': 4318, 'epoch': 1} {'type': 'loss', 'content': 0.1447628289461136, 'timestamp': '2025-10-01 04:18:04.976585', 'step': 4319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:05.012242', 'step': 4319, 'epoch': 1} {'type': 'loss', 'content': 0.21889738738536835, 'timestamp': '2025-10-01 04:18:05.035684', 'step': 4320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:05.076543', 'step': 4320, 'epoch': 1} {'type': 'loss', 'content': 0.11887350678443909, 'timestamp': '2025-10-01 04:18:05.078969', 'step': 4321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:05.121885', 'step': 4321, 'epoch': 1} {'type': 'loss', 'content': 0.2027890384197235, 'timestamp': '2025-10-01 04:18:05.123647', 'step': 4322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:05.158766', 'step': 4322, 'epoch': 1} {'type': 'loss', 'content': 0.22420573234558105, 'timestamp': '2025-10-01 04:18:05.160815', 'step': 4323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:05.193324', 'step': 4323, 'epoch': 1} {'type': 'loss', 'content': 0.2024092823266983, 'timestamp': '2025-10-01 04:18:05.217087', 'step': 4324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:05.251930', 'step': 4324, 'epoch': 1} {'type': 'loss', 'content': 0.11641626805067062, 'timestamp': '2025-10-01 04:18:05.253703', 'step': 4325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:05.286111', 'step': 4325, 'epoch': 1} {'type': 'loss', 'content': 0.1410864293575287, 'timestamp': '2025-10-01 04:18:05.288109', 'step': 4326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:05.323290', 'step': 4326, 'epoch': 1} {'type': 'loss', 'content': 0.30293646454811096, 'timestamp': '2025-10-01 04:18:05.325145', 'step': 4327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:05.360691', 'step': 4327, 'epoch': 1} {'type': 'loss', 'content': 0.15311217308044434, 'timestamp': '2025-10-01 04:18:05.384175', 'step': 4328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:05.416544', 'step': 4328, 'epoch': 1} {'type': 'loss', 'content': 0.14538098871707916, 'timestamp': '2025-10-01 04:18:05.420135', 'step': 4329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:05.462031', 'step': 4329, 'epoch': 1} {'type': 'loss', 'content': 0.20484410226345062, 'timestamp': '2025-10-01 04:18:05.464156', 'step': 4330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:05.498194', 'step': 4330, 'epoch': 1} {'type': 'loss', 'content': 0.19122722744941711, 'timestamp': '2025-10-01 04:18:05.500227', 'step': 4331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:05.542989', 'step': 4331, 'epoch': 1} {'type': 'loss', 'content': 0.09797905385494232, 'timestamp': '2025-10-01 04:18:05.566464', 'step': 4332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:05.606706', 'step': 4332, 'epoch': 1} {'type': 'loss', 'content': 0.14873161911964417, 'timestamp': '2025-10-01 04:18:05.608596', 'step': 4333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:05.638621', 'step': 4333, 'epoch': 1} {'type': 'loss', 'content': 0.07365597039461136, 'timestamp': '2025-10-01 04:18:05.644419', 'step': 4334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:05.683235', 'step': 4334, 'epoch': 1} {'type': 'loss', 'content': 0.11316925287246704, 'timestamp': '2025-10-01 04:18:05.685867', 'step': 4335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:05.717604', 'step': 4335, 'epoch': 1} {'type': 'loss', 'content': 0.19808031618595123, 'timestamp': '2025-10-01 04:18:05.740907', 'step': 4336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:05.772570', 'step': 4336, 'epoch': 1} {'type': 'loss', 'content': 0.12592676281929016, 'timestamp': '2025-10-01 04:18:05.786000', 'step': 4337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:05.826540', 'step': 4337, 'epoch': 1} {'type': 'loss', 'content': 0.20113596320152283, 'timestamp': '2025-10-01 04:18:05.850415', 'step': 4338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:05.896583', 'step': 4338, 'epoch': 1} {'type': 'loss', 'content': 0.17421475052833557, 'timestamp': '2025-10-01 04:18:05.917352', 'step': 4339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:05.964691', 'step': 4339, 'epoch': 1} {'type': 'loss', 'content': 0.11717204004526138, 'timestamp': '2025-10-01 04:18:05.996554', 'step': 4340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:06.051955', 'step': 4340, 'epoch': 1} {'type': 'loss', 'content': 0.13747601211071014, 'timestamp': '2025-10-01 04:18:06.060078', 'step': 4341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:06.109879', 'step': 4341, 'epoch': 1} {'type': 'loss', 'content': 0.14886769652366638, 'timestamp': '2025-10-01 04:18:06.118129', 'step': 4342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:06.166664', 'step': 4342, 'epoch': 1} {'type': 'loss', 'content': 0.16908107697963715, 'timestamp': '2025-10-01 04:18:06.191898', 'step': 4343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:06.234203', 'step': 4343, 'epoch': 1} {'type': 'loss', 'content': 0.19402670860290527, 'timestamp': '2025-10-01 04:18:06.288603', 'step': 4344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:06.335526', 'step': 4344, 'epoch': 1} {'type': 'loss', 'content': 0.09471949189901352, 'timestamp': '2025-10-01 04:18:06.348982', 'step': 4345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:06.388343', 'step': 4345, 'epoch': 1} {'type': 'loss', 'content': 0.13728784024715424, 'timestamp': '2025-10-01 04:18:06.399902', 'step': 4346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:06.455070', 'step': 4346, 'epoch': 1} {'type': 'loss', 'content': 0.21173717081546783, 'timestamp': '2025-10-01 04:18:06.500253', 'step': 4347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:06.540180', 'step': 4347, 'epoch': 1} {'type': 'loss', 'content': 0.17907880246639252, 'timestamp': '2025-10-01 04:18:06.578976', 'step': 4348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:06.635834', 'step': 4348, 'epoch': 1} {'type': 'loss', 'content': 0.16452641785144806, 'timestamp': '2025-10-01 04:18:06.653319', 'step': 4349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:06.705880', 'step': 4349, 'epoch': 1} {'type': 'loss', 'content': 0.2152482569217682, 'timestamp': '2025-10-01 04:18:06.716107', 'step': 4350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:06.767597', 'step': 4350, 'epoch': 1} {'type': 'loss', 'content': 0.13218532502651215, 'timestamp': '2025-10-01 04:18:06.782144', 'step': 4351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:06.823514', 'step': 4351, 'epoch': 1} {'type': 'loss', 'content': 0.18821296095848083, 'timestamp': '2025-10-01 04:18:06.849550', 'step': 4352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:06.889813', 'step': 4352, 'epoch': 1} {'type': 'loss', 'content': 0.11853288859128952, 'timestamp': '2025-10-01 04:18:06.908924', 'step': 4353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:06.943568', 'step': 4353, 'epoch': 1} {'type': 'loss', 'content': 0.1928642988204956, 'timestamp': '2025-10-01 04:18:06.964423', 'step': 4354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:07.005710', 'step': 4354, 'epoch': 1} {'type': 'loss', 'content': 0.2396804243326187, 'timestamp': '2025-10-01 04:18:07.016601', 'step': 4355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:07.063991', 'step': 4355, 'epoch': 1} {'type': 'loss', 'content': 0.06641549617052078, 'timestamp': '2025-10-01 04:18:07.110056', 'step': 4356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:07.167544', 'step': 4356, 'epoch': 1} {'type': 'loss', 'content': 0.24182158708572388, 'timestamp': '2025-10-01 04:18:07.169269', 'step': 4357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:07.200861', 'step': 4357, 'epoch': 1} {'type': 'loss', 'content': 0.18551774322986603, 'timestamp': '2025-10-01 04:18:07.204908', 'step': 4358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:07.251818', 'step': 4358, 'epoch': 1} {'type': 'loss', 'content': 0.22226694226264954, 'timestamp': '2025-10-01 04:18:07.254983', 'step': 4359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:07.298204', 'step': 4359, 'epoch': 1} {'type': 'loss', 'content': 0.049314457923173904, 'timestamp': '2025-10-01 04:18:07.327309', 'step': 4360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:07.359627', 'step': 4360, 'epoch': 1} {'type': 'loss', 'content': 0.09541831910610199, 'timestamp': '2025-10-01 04:18:07.362300', 'step': 4361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:07.403145', 'step': 4361, 'epoch': 1} {'type': 'loss', 'content': 0.23691067099571228, 'timestamp': '2025-10-01 04:18:07.405383', 'step': 4362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:07.444406', 'step': 4362, 'epoch': 1} {'type': 'loss', 'content': 0.08809105306863785, 'timestamp': '2025-10-01 04:18:07.446214', 'step': 4363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:07.482036', 'step': 4363, 'epoch': 1} {'type': 'loss', 'content': 0.17533409595489502, 'timestamp': '2025-10-01 04:18:07.505829', 'step': 4364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:07.561550', 'step': 4364, 'epoch': 1} {'type': 'loss', 'content': 0.16340431571006775, 'timestamp': '2025-10-01 04:18:07.564020', 'step': 4365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:07.599664', 'step': 4365, 'epoch': 1} {'type': 'loss', 'content': 0.15526439249515533, 'timestamp': '2025-10-01 04:18:07.601431', 'step': 4366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:07.647817', 'step': 4366, 'epoch': 1} {'type': 'loss', 'content': 0.13900479674339294, 'timestamp': '2025-10-01 04:18:07.651318', 'step': 4367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:07.693352', 'step': 4367, 'epoch': 1} {'type': 'loss', 'content': 0.2796361446380615, 'timestamp': '2025-10-01 04:18:07.724553', 'step': 4368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:07.780565', 'step': 4368, 'epoch': 1} {'type': 'loss', 'content': 0.1362556368112564, 'timestamp': '2025-10-01 04:18:07.783489', 'step': 4369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:07.815272', 'step': 4369, 'epoch': 1} {'type': 'loss', 'content': 0.29283544421195984, 'timestamp': '2025-10-01 04:18:07.817865', 'step': 4370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:07.865441', 'step': 4370, 'epoch': 1} {'type': 'loss', 'content': 0.17644739151000977, 'timestamp': '2025-10-01 04:18:07.867363', 'step': 4371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:07.906940', 'step': 4371, 'epoch': 1} {'type': 'loss', 'content': 0.2444227635860443, 'timestamp': '2025-10-01 04:18:07.931732', 'step': 4372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:07.979362', 'step': 4372, 'epoch': 1} {'type': 'loss', 'content': 0.14935272932052612, 'timestamp': '2025-10-01 04:18:07.985738', 'step': 4373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:08.018165', 'step': 4373, 'epoch': 1} {'type': 'loss', 'content': 0.2610709071159363, 'timestamp': '2025-10-01 04:18:08.022192', 'step': 4374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:08.054700', 'step': 4374, 'epoch': 1} {'type': 'loss', 'content': 0.2606898248195648, 'timestamp': '2025-10-01 04:18:08.057994', 'step': 4375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:08.092698', 'step': 4375, 'epoch': 1} {'type': 'loss', 'content': 0.12895825505256653, 'timestamp': '2025-10-01 04:18:08.116586', 'step': 4376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:08.149208', 'step': 4376, 'epoch': 1} {'type': 'loss', 'content': 0.09692377597093582, 'timestamp': '2025-10-01 04:18:08.151550', 'step': 4377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:08.193571', 'step': 4377, 'epoch': 1} {'type': 'loss', 'content': 0.121879942715168, 'timestamp': '2025-10-01 04:18:08.196311', 'step': 4378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:08.230792', 'step': 4378, 'epoch': 1} {'type': 'loss', 'content': 0.186903178691864, 'timestamp': '2025-10-01 04:18:08.233276', 'step': 4379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:08.265312', 'step': 4379, 'epoch': 1} {'type': 'loss', 'content': 0.11386559903621674, 'timestamp': '2025-10-01 04:18:08.289138', 'step': 4380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:08.320947', 'step': 4380, 'epoch': 1} {'type': 'loss', 'content': 0.22994613647460938, 'timestamp': '2025-10-01 04:18:08.322909', 'step': 4381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:08.354820', 'step': 4381, 'epoch': 1} {'type': 'loss', 'content': 0.23356851935386658, 'timestamp': '2025-10-01 04:18:08.356739', 'step': 4382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:08.387268', 'step': 4382, 'epoch': 1} {'type': 'loss', 'content': 0.14341606199741364, 'timestamp': '2025-10-01 04:18:08.389242', 'step': 4383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:08.425255', 'step': 4383, 'epoch': 1} {'type': 'loss', 'content': 0.1321655511856079, 'timestamp': '2025-10-01 04:18:08.448733', 'step': 4384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:08.480680', 'step': 4384, 'epoch': 1} {'type': 'loss', 'content': 0.1740870624780655, 'timestamp': '2025-10-01 04:18:08.482604', 'step': 4385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:08.524605', 'step': 4385, 'epoch': 1} {'type': 'loss', 'content': 0.158463716506958, 'timestamp': '2025-10-01 04:18:08.526515', 'step': 4386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:08.564722', 'step': 4386, 'epoch': 1} {'type': 'loss', 'content': 0.20188800990581512, 'timestamp': '2025-10-01 04:18:08.567072', 'step': 4387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:08.602562', 'step': 4387, 'epoch': 1} {'type': 'loss', 'content': 0.0809631422162056, 'timestamp': '2025-10-01 04:18:08.631906', 'step': 4388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:08.663409', 'step': 4388, 'epoch': 1} {'type': 'loss', 'content': 0.1507321298122406, 'timestamp': '2025-10-01 04:18:08.665655', 'step': 4389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:08.696796', 'step': 4389, 'epoch': 1} {'type': 'loss', 'content': 0.14302751421928406, 'timestamp': '2025-10-01 04:18:08.698876', 'step': 4390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:08.752794', 'step': 4390, 'epoch': 1} {'type': 'loss', 'content': 0.16397938132286072, 'timestamp': '2025-10-01 04:18:08.754805', 'step': 4391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:08.786572', 'step': 4391, 'epoch': 1} {'type': 'loss', 'content': 0.17326077818870544, 'timestamp': '2025-10-01 04:18:08.810011', 'step': 4392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:08.849329', 'step': 4392, 'epoch': 1} {'type': 'loss', 'content': 0.1013747900724411, 'timestamp': '2025-10-01 04:18:08.856301', 'step': 4393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:08.895058', 'step': 4393, 'epoch': 1} {'type': 'loss', 'content': 0.15731261670589447, 'timestamp': '2025-10-01 04:18:08.897174', 'step': 4394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:08.931388', 'step': 4394, 'epoch': 1} {'type': 'loss', 'content': 0.1491023451089859, 'timestamp': '2025-10-01 04:18:08.933347', 'step': 4395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:08.966168', 'step': 4395, 'epoch': 1} {'type': 'loss', 'content': 0.2119622677564621, 'timestamp': '2025-10-01 04:18:08.989681', 'step': 4396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:09.021694', 'step': 4396, 'epoch': 1} {'type': 'loss', 'content': 0.15328261256217957, 'timestamp': '2025-10-01 04:18:09.023496', 'step': 4397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:09.056161', 'step': 4397, 'epoch': 1} {'type': 'loss', 'content': 0.15506477653980255, 'timestamp': '2025-10-01 04:18:09.061898', 'step': 4398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:09.099891', 'step': 4398, 'epoch': 1} {'type': 'loss', 'content': 0.12360978871583939, 'timestamp': '2025-10-01 04:18:09.101975', 'step': 4399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:09.141003', 'step': 4399, 'epoch': 1} {'type': 'loss', 'content': 0.17549167573451996, 'timestamp': '2025-10-01 04:18:09.164440', 'step': 4400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:09.196006', 'step': 4400, 'epoch': 1} {'type': 'loss', 'content': 0.14021916687488556, 'timestamp': '2025-10-01 04:18:09.198006', 'step': 4401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:09.238458', 'step': 4401, 'epoch': 1} {'type': 'loss', 'content': 0.25740933418273926, 'timestamp': '2025-10-01 04:18:09.240570', 'step': 4402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:09.278840', 'step': 4402, 'epoch': 1} {'type': 'loss', 'content': 0.22406771779060364, 'timestamp': '2025-10-01 04:18:09.281032', 'step': 4403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:09.315353', 'step': 4403, 'epoch': 1} {'type': 'loss', 'content': 0.15885183215141296, 'timestamp': '2025-10-01 04:18:09.339075', 'step': 4404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:09.378940', 'step': 4404, 'epoch': 1} {'type': 'loss', 'content': 0.08735489100217819, 'timestamp': '2025-10-01 04:18:09.381147', 'step': 4405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:09.419464', 'step': 4405, 'epoch': 1} {'type': 'loss', 'content': 0.15580570697784424, 'timestamp': '2025-10-01 04:18:09.421596', 'step': 4406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:09.453274', 'step': 4406, 'epoch': 1} {'type': 'loss', 'content': 0.08611641079187393, 'timestamp': '2025-10-01 04:18:09.455276', 'step': 4407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:09.487247', 'step': 4407, 'epoch': 1} {'type': 'loss', 'content': 0.15242785215377808, 'timestamp': '2025-10-01 04:18:09.510692', 'step': 4408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:09.544690', 'step': 4408, 'epoch': 1} {'type': 'loss', 'content': 0.09996902942657471, 'timestamp': '2025-10-01 04:18:09.546743', 'step': 4409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:09.578889', 'step': 4409, 'epoch': 1} {'type': 'loss', 'content': 0.1780862659215927, 'timestamp': '2025-10-01 04:18:09.581074', 'step': 4410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:09.627011', 'step': 4410, 'epoch': 1} {'type': 'loss', 'content': 0.20384380221366882, 'timestamp': '2025-10-01 04:18:09.629932', 'step': 4411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:09.673715', 'step': 4411, 'epoch': 1} {'type': 'loss', 'content': 0.14854857325553894, 'timestamp': '2025-10-01 04:18:09.697738', 'step': 4412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:09.730694', 'step': 4412, 'epoch': 1} {'type': 'loss', 'content': 0.15335296094417572, 'timestamp': '2025-10-01 04:18:09.732818', 'step': 4413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:09.774506', 'step': 4413, 'epoch': 1} {'type': 'loss', 'content': 0.24218197166919708, 'timestamp': '2025-10-01 04:18:09.776376', 'step': 4414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:09.811590', 'step': 4414, 'epoch': 1} {'type': 'loss', 'content': 0.09341078996658325, 'timestamp': '2025-10-01 04:18:09.814635', 'step': 4415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:09.850460', 'step': 4415, 'epoch': 1} {'type': 'loss', 'content': 0.16192354261875153, 'timestamp': '2025-10-01 04:18:09.874405', 'step': 4416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:09.914046', 'step': 4416, 'epoch': 1} {'type': 'loss', 'content': 0.11397068947553635, 'timestamp': '2025-10-01 04:18:09.916247', 'step': 4417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:09.956885', 'step': 4417, 'epoch': 1} {'type': 'loss', 'content': 0.16950641572475433, 'timestamp': '2025-10-01 04:18:09.959979', 'step': 4418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:10.005839', 'step': 4418, 'epoch': 1} {'type': 'loss', 'content': 0.11693364381790161, 'timestamp': '2025-10-01 04:18:10.008175', 'step': 4419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:10.041800', 'step': 4419, 'epoch': 1} {'type': 'loss', 'content': 0.20324008166790009, 'timestamp': '2025-10-01 04:18:10.065350', 'step': 4420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:10.113349', 'step': 4420, 'epoch': 1} {'type': 'loss', 'content': 0.14988690614700317, 'timestamp': '2025-10-01 04:18:10.116464', 'step': 4421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:10.155269', 'step': 4421, 'epoch': 1} {'type': 'loss', 'content': 0.18579627573490143, 'timestamp': '2025-10-01 04:18:10.164462', 'step': 4422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:10.205945', 'step': 4422, 'epoch': 1} {'type': 'loss', 'content': 0.25393742322921753, 'timestamp': '2025-10-01 04:18:10.208094', 'step': 4423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:10.257794', 'step': 4423, 'epoch': 1} {'type': 'loss', 'content': 0.14136625826358795, 'timestamp': '2025-10-01 04:18:10.281262', 'step': 4424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:10.317546', 'step': 4424, 'epoch': 1} {'type': 'loss', 'content': 0.17905768752098083, 'timestamp': '2025-10-01 04:18:10.320912', 'step': 4425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:10.355301', 'step': 4425, 'epoch': 1} {'type': 'loss', 'content': 0.19030515849590302, 'timestamp': '2025-10-01 04:18:10.357743', 'step': 4426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:10.397671', 'step': 4426, 'epoch': 1} {'type': 'loss', 'content': 0.18975920975208282, 'timestamp': '2025-10-01 04:18:10.407240', 'step': 4427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:10.444075', 'step': 4427, 'epoch': 1} {'type': 'loss', 'content': 0.07985831052064896, 'timestamp': '2025-10-01 04:18:10.467842', 'step': 4428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:10.509933', 'step': 4428, 'epoch': 1} {'type': 'loss', 'content': 0.19292491674423218, 'timestamp': '2025-10-01 04:18:10.512136', 'step': 4429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:10.565520', 'step': 4429, 'epoch': 1} {'type': 'loss', 'content': 0.16294121742248535, 'timestamp': '2025-10-01 04:18:10.567477', 'step': 4430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:10.601111', 'step': 4430, 'epoch': 1} {'type': 'loss', 'content': 0.16275638341903687, 'timestamp': '2025-10-01 04:18:10.603330', 'step': 4431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:10.637463', 'step': 4431, 'epoch': 1} {'type': 'loss', 'content': 0.241176038980484, 'timestamp': '2025-10-01 04:18:10.661250', 'step': 4432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:10.700310', 'step': 4432, 'epoch': 1} {'type': 'loss', 'content': 0.1731427013874054, 'timestamp': '2025-10-01 04:18:10.702174', 'step': 4433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:10.750504', 'step': 4433, 'epoch': 1} {'type': 'loss', 'content': 0.14590567350387573, 'timestamp': '2025-10-01 04:18:10.759389', 'step': 4434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:10.796514', 'step': 4434, 'epoch': 1} {'type': 'loss', 'content': 0.2695585787296295, 'timestamp': '2025-10-01 04:18:10.798852', 'step': 4435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:10.842122', 'step': 4435, 'epoch': 1} {'type': 'loss', 'content': 0.07051345705986023, 'timestamp': '2025-10-01 04:18:10.866518', 'step': 4436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:10.898878', 'step': 4436, 'epoch': 1} {'type': 'loss', 'content': 0.22318267822265625, 'timestamp': '2025-10-01 04:18:10.900907', 'step': 4437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:10.932297', 'step': 4437, 'epoch': 1} {'type': 'loss', 'content': 0.10106895864009857, 'timestamp': '2025-10-01 04:18:10.934355', 'step': 4438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:10.966804', 'step': 4438, 'epoch': 1} {'type': 'loss', 'content': 0.14817453920841217, 'timestamp': '2025-10-01 04:18:10.968872', 'step': 4439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.007232', 'step': 4439, 'epoch': 1} {'type': 'loss', 'content': 0.14291630685329437, 'timestamp': '2025-10-01 04:18:11.030742', 'step': 4440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:11.065876', 'step': 4440, 'epoch': 1} {'type': 'loss', 'content': 0.3743095099925995, 'timestamp': '2025-10-01 04:18:11.068193', 'step': 4441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.100198', 'step': 4441, 'epoch': 1} {'type': 'loss', 'content': 0.14446349442005157, 'timestamp': '2025-10-01 04:18:11.102237', 'step': 4442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.151966', 'step': 4442, 'epoch': 1} {'type': 'loss', 'content': 0.20231418311595917, 'timestamp': '2025-10-01 04:18:11.153836', 'step': 4443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.200685', 'step': 4443, 'epoch': 1} {'type': 'loss', 'content': 0.24350450932979584, 'timestamp': '2025-10-01 04:18:11.224241', 'step': 4444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:11.278707', 'step': 4444, 'epoch': 1} {'type': 'loss', 'content': 0.16007252037525177, 'timestamp': '2025-10-01 04:18:11.287070', 'step': 4445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.329698', 'step': 4445, 'epoch': 1} {'type': 'loss', 'content': 0.12219314277172089, 'timestamp': '2025-10-01 04:18:11.331745', 'step': 4446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.368699', 'step': 4446, 'epoch': 1} {'type': 'loss', 'content': 0.0953713208436966, 'timestamp': '2025-10-01 04:18:11.370765', 'step': 4447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:11.402805', 'step': 4447, 'epoch': 1} {'type': 'loss', 'content': 0.2230829894542694, 'timestamp': '2025-10-01 04:18:11.426052', 'step': 4448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:11.464254', 'step': 4448, 'epoch': 1} {'type': 'loss', 'content': 0.09307866543531418, 'timestamp': '2025-10-01 04:18:11.466356', 'step': 4449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:11.500608', 'step': 4449, 'epoch': 1} {'type': 'loss', 'content': 0.2742321491241455, 'timestamp': '2025-10-01 04:18:11.502749', 'step': 4450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:11.535966', 'step': 4450, 'epoch': 1} {'type': 'loss', 'content': 0.20080143213272095, 'timestamp': '2025-10-01 04:18:11.537884', 'step': 4451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:11.586331', 'step': 4451, 'epoch': 1} {'type': 'loss', 'content': 0.15874777734279633, 'timestamp': '2025-10-01 04:18:11.610172', 'step': 4452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.654842', 'step': 4452, 'epoch': 1} {'type': 'loss', 'content': 0.14425650238990784, 'timestamp': '2025-10-01 04:18:11.656874', 'step': 4453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.700242', 'step': 4453, 'epoch': 1} {'type': 'loss', 'content': 0.14789308607578278, 'timestamp': '2025-10-01 04:18:11.702106', 'step': 4454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:11.740348', 'step': 4454, 'epoch': 1} {'type': 'loss', 'content': 0.24079737067222595, 'timestamp': '2025-10-01 04:18:11.742211', 'step': 4455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.777681', 'step': 4455, 'epoch': 1} {'type': 'loss', 'content': 0.11909487843513489, 'timestamp': '2025-10-01 04:18:11.801079', 'step': 4456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.839986', 'step': 4456, 'epoch': 1} {'type': 'loss', 'content': 0.09829100966453552, 'timestamp': '2025-10-01 04:18:11.842010', 'step': 4457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:11.876273', 'step': 4457, 'epoch': 1} {'type': 'loss', 'content': 0.1552739292383194, 'timestamp': '2025-10-01 04:18:11.878218', 'step': 4458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:11.913021', 'step': 4458, 'epoch': 1} {'type': 'loss', 'content': 0.09131699055433273, 'timestamp': '2025-10-01 04:18:11.914754', 'step': 4459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:11.959341', 'step': 4459, 'epoch': 1} {'type': 'loss', 'content': 0.2002497762441635, 'timestamp': '2025-10-01 04:18:11.982794', 'step': 4460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.040865', 'step': 4460, 'epoch': 1} {'type': 'loss', 'content': 0.15538790822029114, 'timestamp': '2025-10-01 04:18:12.043052', 'step': 4461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.087435', 'step': 4461, 'epoch': 1} {'type': 'loss', 'content': 0.06904108822345734, 'timestamp': '2025-10-01 04:18:12.089479', 'step': 4462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.121327', 'step': 4462, 'epoch': 1} {'type': 'loss', 'content': 0.19149918854236603, 'timestamp': '2025-10-01 04:18:12.123250', 'step': 4463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.155539', 'step': 4463, 'epoch': 1} {'type': 'loss', 'content': 0.22139880061149597, 'timestamp': '2025-10-01 04:18:12.178683', 'step': 4464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.216959', 'step': 4464, 'epoch': 1} {'type': 'loss', 'content': 0.12744778394699097, 'timestamp': '2025-10-01 04:18:12.220115', 'step': 4465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.254171', 'step': 4465, 'epoch': 1} {'type': 'loss', 'content': 0.19736900925636292, 'timestamp': '2025-10-01 04:18:12.256830', 'step': 4466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:12.297473', 'step': 4466, 'epoch': 1} {'type': 'loss', 'content': 0.1657601296901703, 'timestamp': '2025-10-01 04:18:12.299851', 'step': 4467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:18:12.333923', 'step': 4467, 'epoch': 1} {'type': 'loss', 'content': 0.13909012079238892, 'timestamp': '2025-10-01 04:18:12.361935', 'step': 4468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:12.395972', 'step': 4468, 'epoch': 1} {'type': 'loss', 'content': 0.10328958183526993, 'timestamp': '2025-10-01 04:18:12.397540', 'step': 4469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:12.435110', 'step': 4469, 'epoch': 1} {'type': 'loss', 'content': 0.13922609388828278, 'timestamp': '2025-10-01 04:18:12.436886', 'step': 4470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:12.476882', 'step': 4470, 'epoch': 1} {'type': 'loss', 'content': 0.23172327876091003, 'timestamp': '2025-10-01 04:18:12.480216', 'step': 4471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:12.513851', 'step': 4471, 'epoch': 1} {'type': 'loss', 'content': 0.12049079686403275, 'timestamp': '2025-10-01 04:18:12.537051', 'step': 4472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.568899', 'step': 4472, 'epoch': 1} {'type': 'loss', 'content': 0.20319777727127075, 'timestamp': '2025-10-01 04:18:12.571031', 'step': 4473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:12.610016', 'step': 4473, 'epoch': 1} {'type': 'loss', 'content': 0.0720352903008461, 'timestamp': '2025-10-01 04:18:12.613377', 'step': 4474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:12.647712', 'step': 4474, 'epoch': 1} {'type': 'loss', 'content': 0.21160128712654114, 'timestamp': '2025-10-01 04:18:12.649820', 'step': 4475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:12.693133', 'step': 4475, 'epoch': 1} {'type': 'loss', 'content': 0.15361207723617554, 'timestamp': '2025-10-01 04:18:12.716886', 'step': 4476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.750224', 'step': 4476, 'epoch': 1} {'type': 'loss', 'content': 0.11683395504951477, 'timestamp': '2025-10-01 04:18:12.752197', 'step': 4477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.784554', 'step': 4477, 'epoch': 1} {'type': 'loss', 'content': 0.10220684111118317, 'timestamp': '2025-10-01 04:18:12.786538', 'step': 4478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:12.820802', 'step': 4478, 'epoch': 1} {'type': 'loss', 'content': 0.08992331475019455, 'timestamp': '2025-10-01 04:18:12.822964', 'step': 4479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:12.854435', 'step': 4479, 'epoch': 1} {'type': 'loss', 'content': 0.17542539536952972, 'timestamp': '2025-10-01 04:18:12.877978', 'step': 4480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.917030', 'step': 4480, 'epoch': 1} {'type': 'loss', 'content': 0.1861400008201599, 'timestamp': '2025-10-01 04:18:12.919526', 'step': 4481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.951625', 'step': 4481, 'epoch': 1} {'type': 'loss', 'content': 0.16117620468139648, 'timestamp': '2025-10-01 04:18:12.953548', 'step': 4482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:12.985578', 'step': 4482, 'epoch': 1} {'type': 'loss', 'content': 0.11506035178899765, 'timestamp': '2025-10-01 04:18:12.988098', 'step': 4483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:13.024581', 'step': 4483, 'epoch': 1} {'type': 'loss', 'content': 0.13995212316513062, 'timestamp': '2025-10-01 04:18:13.056342', 'step': 4484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:13.093167', 'step': 4484, 'epoch': 1} {'type': 'loss', 'content': 0.08678188174962997, 'timestamp': '2025-10-01 04:18:13.095041', 'step': 4485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:13.126924', 'step': 4485, 'epoch': 1} {'type': 'loss', 'content': 0.21571217477321625, 'timestamp': '2025-10-01 04:18:13.128575', 'step': 4486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:13.159851', 'step': 4486, 'epoch': 1} {'type': 'loss', 'content': 0.16870814561843872, 'timestamp': '2025-10-01 04:18:13.161813', 'step': 4487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:13.198067', 'step': 4487, 'epoch': 1} {'type': 'loss', 'content': 0.1089303269982338, 'timestamp': '2025-10-01 04:18:13.221317', 'step': 4488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:13.251530', 'step': 4488, 'epoch': 1} {'type': 'loss', 'content': 0.1515210121870041, 'timestamp': '2025-10-01 04:18:13.253305', 'step': 4489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:13.285142', 'step': 4489, 'epoch': 1} {'type': 'loss', 'content': 0.10295099020004272, 'timestamp': '2025-10-01 04:18:13.287892', 'step': 4490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:13.321980', 'step': 4490, 'epoch': 1} {'type': 'loss', 'content': 0.16857489943504333, 'timestamp': '2025-10-01 04:18:13.324113', 'step': 4491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:13.358636', 'step': 4491, 'epoch': 1} {'type': 'loss', 'content': 0.14780710637569427, 'timestamp': '2025-10-01 04:18:13.382166', 'step': 4492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:13.424208', 'step': 4492, 'epoch': 1} {'type': 'loss', 'content': 0.14588744938373566, 'timestamp': '2025-10-01 04:18:13.426428', 'step': 4493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:13.461757', 'step': 4493, 'epoch': 1} {'type': 'loss', 'content': 0.10919910669326782, 'timestamp': '2025-10-01 04:18:13.463199', 'step': 4494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:13.503667', 'step': 4494, 'epoch': 1} {'type': 'loss', 'content': 0.12014129012823105, 'timestamp': '2025-10-01 04:18:13.505890', 'step': 4495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:13.549403', 'step': 4495, 'epoch': 1} {'type': 'loss', 'content': 0.1562400907278061, 'timestamp': '2025-10-01 04:18:13.572870', 'step': 4496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:13.616110', 'step': 4496, 'epoch': 1} {'type': 'loss', 'content': 0.28915271162986755, 'timestamp': '2025-10-01 04:18:13.618197', 'step': 4497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:13.650812', 'step': 4497, 'epoch': 1} {'type': 'loss', 'content': 0.14860348403453827, 'timestamp': '2025-10-01 04:18:13.652941', 'step': 4498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:13.689730', 'step': 4498, 'epoch': 1} {'type': 'loss', 'content': 0.20919935405254364, 'timestamp': '2025-10-01 04:18:13.699358', 'step': 4499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:13.731231', 'step': 4499, 'epoch': 1} {'type': 'loss', 'content': 0.10515201836824417, 'timestamp': '2025-10-01 04:18:13.754716', 'step': 4500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-10-01 04:18:18.684553', 'step': 4500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:18.717386', 'step': 4500, 'epoch': 1} {'type': 'loss', 'content': 0.16491413116455078, 'timestamp': '2025-10-01 04:18:18.720053', 'step': 4501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:18.751622', 'step': 4501, 'epoch': 1} {'type': 'loss', 'content': 0.10076682269573212, 'timestamp': '2025-10-01 04:18:18.753090', 'step': 4502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:18.783698', 'step': 4502, 'epoch': 1} {'type': 'loss', 'content': 0.265916109085083, 'timestamp': '2025-10-01 04:18:18.785736', 'step': 4503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:18.817275', 'step': 4503, 'epoch': 1} {'type': 'loss', 'content': 0.15324510633945465, 'timestamp': '2025-10-01 04:18:18.841009', 'step': 4504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:18.874870', 'step': 4504, 'epoch': 1} {'type': 'loss', 'content': 0.2640543580055237, 'timestamp': '2025-10-01 04:18:18.881141', 'step': 4505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:18.918191', 'step': 4505, 'epoch': 1} {'type': 'loss', 'content': 0.09747020900249481, 'timestamp': '2025-10-01 04:18:18.920119', 'step': 4506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:18.951294', 'step': 4506, 'epoch': 1} {'type': 'loss', 'content': 0.23803579807281494, 'timestamp': '2025-10-01 04:18:18.953195', 'step': 4507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:18.987623', 'step': 4507, 'epoch': 1} {'type': 'loss', 'content': 0.20332907140254974, 'timestamp': '2025-10-01 04:18:19.011156', 'step': 4508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:19.051315', 'step': 4508, 'epoch': 1} {'type': 'loss', 'content': 0.17279356718063354, 'timestamp': '2025-10-01 04:18:19.052798', 'step': 4509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:19.087186', 'step': 4509, 'epoch': 1} {'type': 'loss', 'content': 0.16645197570323944, 'timestamp': '2025-10-01 04:18:19.088795', 'step': 4510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:19.119355', 'step': 4510, 'epoch': 1} {'type': 'loss', 'content': 0.1685909479856491, 'timestamp': '2025-10-01 04:18:19.121670', 'step': 4511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:19.153310', 'step': 4511, 'epoch': 1} {'type': 'loss', 'content': 0.19162507355213165, 'timestamp': '2025-10-01 04:18:19.176494', 'step': 4512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:19.212862', 'step': 4512, 'epoch': 1} {'type': 'loss', 'content': 0.18459370732307434, 'timestamp': '2025-10-01 04:18:19.214746', 'step': 4513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:19.245269', 'step': 4513, 'epoch': 1} {'type': 'loss', 'content': 0.22400443255901337, 'timestamp': '2025-10-01 04:18:19.247391', 'step': 4514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:19.296598', 'step': 4514, 'epoch': 1} {'type': 'loss', 'content': 0.16036038100719452, 'timestamp': '2025-10-01 04:18:19.303875', 'step': 4515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:19.334066', 'step': 4515, 'epoch': 1} {'type': 'loss', 'content': 0.13042877614498138, 'timestamp': '2025-10-01 04:18:19.360445', 'step': 4516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:19.392862', 'step': 4516, 'epoch': 1} {'type': 'loss', 'content': 0.11641311645507812, 'timestamp': '2025-10-01 04:18:19.394550', 'step': 4517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:19.427384', 'step': 4517, 'epoch': 1} {'type': 'loss', 'content': 0.18283611536026, 'timestamp': '2025-10-01 04:18:19.429418', 'step': 4518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:19.462847', 'step': 4518, 'epoch': 1} {'type': 'loss', 'content': 0.13442979753017426, 'timestamp': '2025-10-01 04:18:19.464801', 'step': 4519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:19.495930', 'step': 4519, 'epoch': 1} {'type': 'loss', 'content': 0.15178586542606354, 'timestamp': '2025-10-01 04:18:19.519859', 'step': 4520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:19.549943', 'step': 4520, 'epoch': 1} {'type': 'loss', 'content': 0.19955770671367645, 'timestamp': '2025-10-01 04:18:19.551414', 'step': 4521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:19.584208', 'step': 4521, 'epoch': 1} {'type': 'loss', 'content': 0.1848500669002533, 'timestamp': '2025-10-01 04:18:19.586216', 'step': 4522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:19.624844', 'step': 4522, 'epoch': 1} {'type': 'loss', 'content': 0.1037704348564148, 'timestamp': '2025-10-01 04:18:19.626759', 'step': 4523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:19.658973', 'step': 4523, 'epoch': 1} {'type': 'loss', 'content': 0.1410001516342163, 'timestamp': '2025-10-01 04:18:19.682236', 'step': 4524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:19.716045', 'step': 4524, 'epoch': 1} {'type': 'loss', 'content': 0.12424791604280472, 'timestamp': '2025-10-01 04:18:19.717992', 'step': 4525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:19.751980', 'step': 4525, 'epoch': 1} {'type': 'loss', 'content': 0.17714375257492065, 'timestamp': '2025-10-01 04:18:19.753747', 'step': 4526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:19.788129', 'step': 4526, 'epoch': 1} {'type': 'loss', 'content': 0.15825550258159637, 'timestamp': '2025-10-01 04:18:19.790075', 'step': 4527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:19.822149', 'step': 4527, 'epoch': 1} {'type': 'loss', 'content': 0.1092446818947792, 'timestamp': '2025-10-01 04:18:19.845830', 'step': 4528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:19.878263', 'step': 4528, 'epoch': 1} {'type': 'loss', 'content': 0.18829955160617828, 'timestamp': '2025-10-01 04:18:19.880223', 'step': 4529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:19.914937', 'step': 4529, 'epoch': 1} {'type': 'loss', 'content': 0.13937102258205414, 'timestamp': '2025-10-01 04:18:19.918458', 'step': 4530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:19.949623', 'step': 4530, 'epoch': 1} {'type': 'loss', 'content': 0.15449252724647522, 'timestamp': '2025-10-01 04:18:19.952007', 'step': 4531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:19.985437', 'step': 4531, 'epoch': 1} {'type': 'loss', 'content': 0.19414761662483215, 'timestamp': '2025-10-01 04:18:20.013931', 'step': 4532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:20.058280', 'step': 4532, 'epoch': 1} {'type': 'loss', 'content': 0.11796677857637405, 'timestamp': '2025-10-01 04:18:20.061303', 'step': 4533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:20.094116', 'step': 4533, 'epoch': 1} {'type': 'loss', 'content': 0.3254022002220154, 'timestamp': '2025-10-01 04:18:20.095898', 'step': 4534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:20.131673', 'step': 4534, 'epoch': 1} {'type': 'loss', 'content': 0.15248391032218933, 'timestamp': '2025-10-01 04:18:20.133446', 'step': 4535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:20.165572', 'step': 4535, 'epoch': 1} {'type': 'loss', 'content': 0.17270830273628235, 'timestamp': '2025-10-01 04:18:20.189881', 'step': 4536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:20.220744', 'step': 4536, 'epoch': 1} {'type': 'loss', 'content': 0.18798774480819702, 'timestamp': '2025-10-01 04:18:20.222467', 'step': 4537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:20.264681', 'step': 4537, 'epoch': 1} {'type': 'loss', 'content': 0.18252986669540405, 'timestamp': '2025-10-01 04:18:20.266145', 'step': 4538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:20.297700', 'step': 4538, 'epoch': 1} {'type': 'loss', 'content': 0.13195540010929108, 'timestamp': '2025-10-01 04:18:20.299463', 'step': 4539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:20.333713', 'step': 4539, 'epoch': 1} {'type': 'loss', 'content': 0.1725742369890213, 'timestamp': '2025-10-01 04:18:20.357020', 'step': 4540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:20.390824', 'step': 4540, 'epoch': 1} {'type': 'loss', 'content': 0.19138003885746002, 'timestamp': '2025-10-01 04:18:20.393214', 'step': 4541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:18:20.425351', 'step': 4541, 'epoch': 1} {'type': 'loss', 'content': 0.11950460821390152, 'timestamp': '2025-10-01 04:18:20.429644', 'step': 4542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:20.460308', 'step': 4542, 'epoch': 1} {'type': 'loss', 'content': 0.23356573283672333, 'timestamp': '2025-10-01 04:18:20.461915', 'step': 4543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:20.495221', 'step': 4543, 'epoch': 1} {'type': 'loss', 'content': 0.08381030708551407, 'timestamp': '2025-10-01 04:18:20.518761', 'step': 4544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:20.550915', 'step': 4544, 'epoch': 1} {'type': 'loss', 'content': 0.27777299284935, 'timestamp': '2025-10-01 04:18:20.552436', 'step': 4545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:20.595751', 'step': 4545, 'epoch': 1} {'type': 'loss', 'content': 0.09953282028436661, 'timestamp': '2025-10-01 04:18:20.599071', 'step': 4546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:20.632202', 'step': 4546, 'epoch': 1} {'type': 'loss', 'content': 0.13629741966724396, 'timestamp': '2025-10-01 04:18:20.633915', 'step': 4547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:20.674930', 'step': 4547, 'epoch': 1} {'type': 'loss', 'content': 0.17798441648483276, 'timestamp': '2025-10-01 04:18:20.698391', 'step': 4548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:20.735681', 'step': 4548, 'epoch': 1} {'type': 'loss', 'content': 0.13969789445400238, 'timestamp': '2025-10-01 04:18:20.737545', 'step': 4549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:20.768387', 'step': 4549, 'epoch': 1} {'type': 'loss', 'content': 0.23209264874458313, 'timestamp': '2025-10-01 04:18:20.770096', 'step': 4550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:20.807190', 'step': 4550, 'epoch': 1} {'type': 'loss', 'content': 0.05398545786738396, 'timestamp': '2025-10-01 04:18:20.809108', 'step': 4551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:20.840285', 'step': 4551, 'epoch': 1} {'type': 'loss', 'content': 0.13417215645313263, 'timestamp': '2025-10-01 04:18:20.864304', 'step': 4552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:20.906867', 'step': 4552, 'epoch': 1} {'type': 'loss', 'content': 0.14556066691875458, 'timestamp': '2025-10-01 04:18:20.908590', 'step': 4553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:20.947230', 'step': 4553, 'epoch': 1} {'type': 'loss', 'content': 0.15221485495567322, 'timestamp': '2025-10-01 04:18:20.949191', 'step': 4554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:20.983845', 'step': 4554, 'epoch': 1} {'type': 'loss', 'content': 0.16764910519123077, 'timestamp': '2025-10-01 04:18:20.985754', 'step': 4555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:21.021749', 'step': 4555, 'epoch': 1} {'type': 'loss', 'content': 0.0712328851222992, 'timestamp': '2025-10-01 04:18:21.045425', 'step': 4556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:21.089905', 'step': 4556, 'epoch': 1} {'type': 'loss', 'content': 0.13203005492687225, 'timestamp': '2025-10-01 04:18:21.091663', 'step': 4557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:21.140363', 'step': 4557, 'epoch': 1} {'type': 'loss', 'content': 0.1100556030869484, 'timestamp': '2025-10-01 04:18:21.142778', 'step': 4558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:21.173911', 'step': 4558, 'epoch': 1} {'type': 'loss', 'content': 0.1255735605955124, 'timestamp': '2025-10-01 04:18:21.176313', 'step': 4559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:21.218914', 'step': 4559, 'epoch': 1} {'type': 'loss', 'content': 0.14283859729766846, 'timestamp': '2025-10-01 04:18:21.242412', 'step': 4560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:21.273839', 'step': 4560, 'epoch': 1} {'type': 'loss', 'content': 0.11661753058433533, 'timestamp': '2025-10-01 04:18:21.275604', 'step': 4561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:21.307417', 'step': 4561, 'epoch': 1} {'type': 'loss', 'content': 0.1389426290988922, 'timestamp': '2025-10-01 04:18:21.309433', 'step': 4562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:21.340546', 'step': 4562, 'epoch': 1} {'type': 'loss', 'content': 0.12147622555494308, 'timestamp': '2025-10-01 04:18:21.342577', 'step': 4563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:21.387175', 'step': 4563, 'epoch': 1} {'type': 'loss', 'content': 0.16080182790756226, 'timestamp': '2025-10-01 04:18:21.410557', 'step': 4564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:21.445129', 'step': 4564, 'epoch': 1} {'type': 'loss', 'content': 0.12494722008705139, 'timestamp': '2025-10-01 04:18:21.447022', 'step': 4565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:21.482809', 'step': 4565, 'epoch': 1} {'type': 'loss', 'content': 0.09039907157421112, 'timestamp': '2025-10-01 04:18:21.484554', 'step': 4566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:21.528342', 'step': 4566, 'epoch': 1} {'type': 'loss', 'content': 0.18539361655712128, 'timestamp': '2025-10-01 04:18:21.531589', 'step': 4567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:21.565675', 'step': 4567, 'epoch': 1} {'type': 'loss', 'content': 0.12655065953731537, 'timestamp': '2025-10-01 04:18:21.588872', 'step': 4568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:21.622122', 'step': 4568, 'epoch': 1} {'type': 'loss', 'content': 0.18938963115215302, 'timestamp': '2025-10-01 04:18:21.624604', 'step': 4569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:21.658308', 'step': 4569, 'epoch': 1} {'type': 'loss', 'content': 0.2565794885158539, 'timestamp': '2025-10-01 04:18:21.667547', 'step': 4570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:21.698792', 'step': 4570, 'epoch': 1} {'type': 'loss', 'content': 0.1483936309814453, 'timestamp': '2025-10-01 04:18:21.700519', 'step': 4571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:21.732992', 'step': 4571, 'epoch': 1} {'type': 'loss', 'content': 0.16435404121875763, 'timestamp': '2025-10-01 04:18:21.756251', 'step': 4572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:21.791053', 'step': 4572, 'epoch': 1} {'type': 'loss', 'content': 0.19138231873512268, 'timestamp': '2025-10-01 04:18:21.792747', 'step': 4573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:21.826702', 'step': 4573, 'epoch': 1} {'type': 'loss', 'content': 0.09689341485500336, 'timestamp': '2025-10-01 04:18:21.829128', 'step': 4574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:21.859986', 'step': 4574, 'epoch': 1} {'type': 'loss', 'content': 0.15427781641483307, 'timestamp': '2025-10-01 04:18:21.862423', 'step': 4575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:21.892774', 'step': 4575, 'epoch': 1} {'type': 'loss', 'content': 0.16549170017242432, 'timestamp': '2025-10-01 04:18:21.916312', 'step': 4576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:21.947747', 'step': 4576, 'epoch': 1} {'type': 'loss', 'content': 0.18163765966892242, 'timestamp': '2025-10-01 04:18:21.949793', 'step': 4577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:21.993544', 'step': 4577, 'epoch': 1} {'type': 'loss', 'content': 0.17246069014072418, 'timestamp': '2025-10-01 04:18:21.996695', 'step': 4578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:22.029088', 'step': 4578, 'epoch': 1} {'type': 'loss', 'content': 0.09952571988105774, 'timestamp': '2025-10-01 04:18:22.031667', 'step': 4579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:22.072798', 'step': 4579, 'epoch': 1} {'type': 'loss', 'content': 0.13404786586761475, 'timestamp': '2025-10-01 04:18:22.098997', 'step': 4580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:22.141502', 'step': 4580, 'epoch': 1} {'type': 'loss', 'content': 0.15760265290737152, 'timestamp': '2025-10-01 04:18:22.143264', 'step': 4581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:22.201739', 'step': 4581, 'epoch': 1} {'type': 'loss', 'content': 0.16381530463695526, 'timestamp': '2025-10-01 04:18:22.204091', 'step': 4582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:22.240473', 'step': 4582, 'epoch': 1} {'type': 'loss', 'content': 0.23022229969501495, 'timestamp': '2025-10-01 04:18:22.242862', 'step': 4583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:22.277189', 'step': 4583, 'epoch': 1} {'type': 'loss', 'content': 0.09440798312425613, 'timestamp': '2025-10-01 04:18:22.300470', 'step': 4584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:22.331583', 'step': 4584, 'epoch': 1} {'type': 'loss', 'content': 0.18473033607006073, 'timestamp': '2025-10-01 04:18:22.333485', 'step': 4585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:22.371104', 'step': 4585, 'epoch': 1} {'type': 'loss', 'content': 0.11382043361663818, 'timestamp': '2025-10-01 04:18:22.373051', 'step': 4586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:22.404789', 'step': 4586, 'epoch': 1} {'type': 'loss', 'content': 0.10794997960329056, 'timestamp': '2025-10-01 04:18:22.406695', 'step': 4587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:22.441536', 'step': 4587, 'epoch': 1} {'type': 'loss', 'content': 0.18222643435001373, 'timestamp': '2025-10-01 04:18:22.464712', 'step': 4588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:22.497586', 'step': 4588, 'epoch': 1} {'type': 'loss', 'content': 0.12247880548238754, 'timestamp': '2025-10-01 04:18:22.499370', 'step': 4589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:22.538012', 'step': 4589, 'epoch': 1} {'type': 'loss', 'content': 0.10721208155155182, 'timestamp': '2025-10-01 04:18:22.539750', 'step': 4590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:22.571175', 'step': 4590, 'epoch': 1} {'type': 'loss', 'content': 0.12562046945095062, 'timestamp': '2025-10-01 04:18:22.573008', 'step': 4591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:18:22.609043', 'step': 4591, 'epoch': 1} {'type': 'loss', 'content': 0.16599281132221222, 'timestamp': '2025-10-01 04:18:22.634164', 'step': 4592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:22.674408', 'step': 4592, 'epoch': 1} {'type': 'loss', 'content': 0.1318279206752777, 'timestamp': '2025-10-01 04:18:22.676353', 'step': 4593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:22.709806', 'step': 4593, 'epoch': 1} {'type': 'loss', 'content': 0.2202429622411728, 'timestamp': '2025-10-01 04:18:22.712071', 'step': 4594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:22.748006', 'step': 4594, 'epoch': 1} {'type': 'loss', 'content': 0.18215352296829224, 'timestamp': '2025-10-01 04:18:22.749877', 'step': 4595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:22.781498', 'step': 4595, 'epoch': 1} {'type': 'loss', 'content': 0.11593541502952576, 'timestamp': '2025-10-01 04:18:22.804773', 'step': 4596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:22.844447', 'step': 4596, 'epoch': 1} {'type': 'loss', 'content': 0.16409695148468018, 'timestamp': '2025-10-01 04:18:22.846034', 'step': 4597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:22.876809', 'step': 4597, 'epoch': 1} {'type': 'loss', 'content': 0.08990717679262161, 'timestamp': '2025-10-01 04:18:22.878495', 'step': 4598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:22.910368', 'step': 4598, 'epoch': 1} {'type': 'loss', 'content': 0.14238910377025604, 'timestamp': '2025-10-01 04:18:22.912664', 'step': 4599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:22.958802', 'step': 4599, 'epoch': 1} {'type': 'loss', 'content': 0.18608549237251282, 'timestamp': '2025-10-01 04:18:22.982032', 'step': 4600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.034820', 'step': 4600, 'epoch': 1} {'type': 'loss', 'content': 0.2239447832107544, 'timestamp': '2025-10-01 04:18:23.036735', 'step': 4601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:23.078047', 'step': 4601, 'epoch': 1} {'type': 'loss', 'content': 0.11232047528028488, 'timestamp': '2025-10-01 04:18:23.081516', 'step': 4602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:23.113288', 'step': 4602, 'epoch': 1} {'type': 'loss', 'content': 0.2030051201581955, 'timestamp': '2025-10-01 04:18:23.115621', 'step': 4603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:23.147516', 'step': 4603, 'epoch': 1} {'type': 'loss', 'content': 0.13980254530906677, 'timestamp': '2025-10-01 04:18:23.170618', 'step': 4604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.223419', 'step': 4604, 'epoch': 1} {'type': 'loss', 'content': 0.14813192188739777, 'timestamp': '2025-10-01 04:18:23.225926', 'step': 4605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.257445', 'step': 4605, 'epoch': 1} {'type': 'loss', 'content': 0.14892572164535522, 'timestamp': '2025-10-01 04:18:23.259481', 'step': 4606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:23.293262', 'step': 4606, 'epoch': 1} {'type': 'loss', 'content': 0.12824532389640808, 'timestamp': '2025-10-01 04:18:23.295037', 'step': 4607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.335602', 'step': 4607, 'epoch': 1} {'type': 'loss', 'content': 0.13737529516220093, 'timestamp': '2025-10-01 04:18:23.360953', 'step': 4608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.392918', 'step': 4608, 'epoch': 1} {'type': 'loss', 'content': 0.13512562215328217, 'timestamp': '2025-10-01 04:18:23.394870', 'step': 4609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:23.435556', 'step': 4609, 'epoch': 1} {'type': 'loss', 'content': 0.16400913894176483, 'timestamp': '2025-10-01 04:18:23.439378', 'step': 4610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.471033', 'step': 4610, 'epoch': 1} {'type': 'loss', 'content': 0.16787831485271454, 'timestamp': '2025-10-01 04:18:23.472857', 'step': 4611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.508387', 'step': 4611, 'epoch': 1} {'type': 'loss', 'content': 0.10515151917934418, 'timestamp': '2025-10-01 04:18:23.531841', 'step': 4612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.578085', 'step': 4612, 'epoch': 1} {'type': 'loss', 'content': 0.28550460934638977, 'timestamp': '2025-10-01 04:18:23.579790', 'step': 4613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.611772', 'step': 4613, 'epoch': 1} {'type': 'loss', 'content': 0.14404426515102386, 'timestamp': '2025-10-01 04:18:23.613367', 'step': 4614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:23.656689', 'step': 4614, 'epoch': 1} {'type': 'loss', 'content': 0.1027304008603096, 'timestamp': '2025-10-01 04:18:23.658315', 'step': 4615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:23.689291', 'step': 4615, 'epoch': 1} {'type': 'loss', 'content': 0.14658011496067047, 'timestamp': '2025-10-01 04:18:23.712622', 'step': 4616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:23.743905', 'step': 4616, 'epoch': 1} {'type': 'loss', 'content': 0.11860370635986328, 'timestamp': '2025-10-01 04:18:23.749638', 'step': 4617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:23.788172', 'step': 4617, 'epoch': 1} {'type': 'loss', 'content': 0.12277150899171829, 'timestamp': '2025-10-01 04:18:23.791538', 'step': 4618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:23.823138', 'step': 4618, 'epoch': 1} {'type': 'loss', 'content': 0.149408221244812, 'timestamp': '2025-10-01 04:18:23.827179', 'step': 4619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.868375', 'step': 4619, 'epoch': 1} {'type': 'loss', 'content': 0.2299174964427948, 'timestamp': '2025-10-01 04:18:23.893869', 'step': 4620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:23.931937', 'step': 4620, 'epoch': 1} {'type': 'loss', 'content': 0.15692460536956787, 'timestamp': '2025-10-01 04:18:23.936879', 'step': 4621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:23.968569', 'step': 4621, 'epoch': 1} {'type': 'loss', 'content': 0.1525355577468872, 'timestamp': '2025-10-01 04:18:23.970154', 'step': 4622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:24.008681', 'step': 4622, 'epoch': 1} {'type': 'loss', 'content': 0.12395406514406204, 'timestamp': '2025-10-01 04:18:24.010310', 'step': 4623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:24.053858', 'step': 4623, 'epoch': 1} {'type': 'loss', 'content': 0.16943156719207764, 'timestamp': '2025-10-01 04:18:24.077256', 'step': 4624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:24.110682', 'step': 4624, 'epoch': 1} {'type': 'loss', 'content': 0.3813733756542206, 'timestamp': '2025-10-01 04:18:24.112401', 'step': 4625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:24.151056', 'step': 4625, 'epoch': 1} {'type': 'loss', 'content': 0.22444692254066467, 'timestamp': '2025-10-01 04:18:24.152894', 'step': 4626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:24.191471', 'step': 4626, 'epoch': 1} {'type': 'loss', 'content': 0.19631773233413696, 'timestamp': '2025-10-01 04:18:24.193802', 'step': 4627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:24.233194', 'step': 4627, 'epoch': 1} {'type': 'loss', 'content': 0.07892028242349625, 'timestamp': '2025-10-01 04:18:24.256767', 'step': 4628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:24.288883', 'step': 4628, 'epoch': 1} {'type': 'loss', 'content': 0.19743214547634125, 'timestamp': '2025-10-01 04:18:24.290741', 'step': 4629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:24.329690', 'step': 4629, 'epoch': 1} {'type': 'loss', 'content': 0.15336768329143524, 'timestamp': '2025-10-01 04:18:24.332045', 'step': 4630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:24.363856', 'step': 4630, 'epoch': 1} {'type': 'loss', 'content': 0.10784363746643066, 'timestamp': '2025-10-01 04:18:24.365191', 'step': 4631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:24.403800', 'step': 4631, 'epoch': 1} {'type': 'loss', 'content': 0.16649290919303894, 'timestamp': '2025-10-01 04:18:24.428075', 'step': 4632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:24.460096', 'step': 4632, 'epoch': 1} {'type': 'loss', 'content': 0.17486822605133057, 'timestamp': '2025-10-01 04:18:24.461877', 'step': 4633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:24.504590', 'step': 4633, 'epoch': 1} {'type': 'loss', 'content': 0.1319427341222763, 'timestamp': '2025-10-01 04:18:24.506527', 'step': 4634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:24.539566', 'step': 4634, 'epoch': 1} {'type': 'loss', 'content': 0.13906124234199524, 'timestamp': '2025-10-01 04:18:24.544122', 'step': 4635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:24.577803', 'step': 4635, 'epoch': 1} {'type': 'loss', 'content': 0.12805253267288208, 'timestamp': '2025-10-01 04:18:24.600964', 'step': 4636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:24.639310', 'step': 4636, 'epoch': 1} {'type': 'loss', 'content': 0.21425995230674744, 'timestamp': '2025-10-01 04:18:24.641001', 'step': 4637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:24.676365', 'step': 4637, 'epoch': 1} {'type': 'loss', 'content': 0.18961429595947266, 'timestamp': '2025-10-01 04:18:24.679915', 'step': 4638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:24.716062', 'step': 4638, 'epoch': 1} {'type': 'loss', 'content': 0.2479199767112732, 'timestamp': '2025-10-01 04:18:24.719813', 'step': 4639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:24.752870', 'step': 4639, 'epoch': 1} {'type': 'loss', 'content': 0.2252425104379654, 'timestamp': '2025-10-01 04:18:24.776135', 'step': 4640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:24.808518', 'step': 4640, 'epoch': 1} {'type': 'loss', 'content': 0.14686085283756256, 'timestamp': '2025-10-01 04:18:24.810218', 'step': 4641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:24.842302', 'step': 4641, 'epoch': 1} {'type': 'loss', 'content': 0.15409965813159943, 'timestamp': '2025-10-01 04:18:24.845249', 'step': 4642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:24.876305', 'step': 4642, 'epoch': 1} {'type': 'loss', 'content': 0.08740720897912979, 'timestamp': '2025-10-01 04:18:24.878000', 'step': 4643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:24.909265', 'step': 4643, 'epoch': 1} {'type': 'loss', 'content': 0.2174876183271408, 'timestamp': '2025-10-01 04:18:24.932633', 'step': 4644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:24.964147', 'step': 4644, 'epoch': 1} {'type': 'loss', 'content': 0.18195423483848572, 'timestamp': '2025-10-01 04:18:24.966123', 'step': 4645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:24.997259', 'step': 4645, 'epoch': 1} {'type': 'loss', 'content': 0.1436905413866043, 'timestamp': '2025-10-01 04:18:24.998957', 'step': 4646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:25.033679', 'step': 4646, 'epoch': 1} {'type': 'loss', 'content': 0.17300167679786682, 'timestamp': '2025-10-01 04:18:25.040072', 'step': 4647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:25.071126', 'step': 4647, 'epoch': 1} {'type': 'loss', 'content': 0.13015329837799072, 'timestamp': '2025-10-01 04:18:25.098902', 'step': 4648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:25.130480', 'step': 4648, 'epoch': 1} {'type': 'loss', 'content': 0.1823708713054657, 'timestamp': '2025-10-01 04:18:25.132132', 'step': 4649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:25.163228', 'step': 4649, 'epoch': 1} {'type': 'loss', 'content': 0.17051257193088531, 'timestamp': '2025-10-01 04:18:25.165223', 'step': 4650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:25.195683', 'step': 4650, 'epoch': 1} {'type': 'loss', 'content': 0.24689099192619324, 'timestamp': '2025-10-01 04:18:25.197396', 'step': 4651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:25.228215', 'step': 4651, 'epoch': 1} {'type': 'loss', 'content': 0.1089915856719017, 'timestamp': '2025-10-01 04:18:25.251718', 'step': 4652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:25.298258', 'step': 4652, 'epoch': 1} {'type': 'loss', 'content': 0.15674394369125366, 'timestamp': '2025-10-01 04:18:25.300337', 'step': 4653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:25.339310', 'step': 4653, 'epoch': 1} {'type': 'loss', 'content': 0.2778947651386261, 'timestamp': '2025-10-01 04:18:25.341125', 'step': 4654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:25.373875', 'step': 4654, 'epoch': 1} {'type': 'loss', 'content': 0.16043545305728912, 'timestamp': '2025-10-01 04:18:25.375739', 'step': 4655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:25.413861', 'step': 4655, 'epoch': 1} {'type': 'loss', 'content': 0.1334228515625, 'timestamp': '2025-10-01 04:18:25.437749', 'step': 4656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:25.469795', 'step': 4656, 'epoch': 1} {'type': 'loss', 'content': 0.18723034858703613, 'timestamp': '2025-10-01 04:18:25.471350', 'step': 4657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:25.512851', 'step': 4657, 'epoch': 1} {'type': 'loss', 'content': 0.187142476439476, 'timestamp': '2025-10-01 04:18:25.514567', 'step': 4658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:25.545339', 'step': 4658, 'epoch': 1} {'type': 'loss', 'content': 0.13792428374290466, 'timestamp': '2025-10-01 04:18:25.547770', 'step': 4659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:25.585498', 'step': 4659, 'epoch': 1} {'type': 'loss', 'content': 0.22203579545021057, 'timestamp': '2025-10-01 04:18:25.608808', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:18:35.355475', 'step': 4660, 'epoch': 1} {'type': 'pplx', 'content': 9341.819285268068, 'timestamp': '2025-10-01 04:18:35.358437', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:35.390449', 'step': 4660, 'epoch': 1} {'type': 'loss', 'content': 0.13312342762947083, 'timestamp': '2025-10-01 04:18:35.392498', 'step': 4661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:35.439160', 'step': 4661, 'epoch': 1} {'type': 'loss', 'content': 0.1720503717660904, 'timestamp': '2025-10-01 04:18:35.441127', 'step': 4662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:35.487495', 'step': 4662, 'epoch': 1} {'type': 'loss', 'content': 0.14291201531887054, 'timestamp': '2025-10-01 04:18:35.489171', 'step': 4663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:35.519866', 'step': 4663, 'epoch': 1} {'type': 'loss', 'content': 0.14278821647167206, 'timestamp': '2025-10-01 04:18:35.544054', 'step': 4664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:35.576625', 'step': 4664, 'epoch': 1} {'type': 'loss', 'content': 0.10015594959259033, 'timestamp': '2025-10-01 04:18:35.578812', 'step': 4665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:35.609855', 'step': 4665, 'epoch': 1} {'type': 'loss', 'content': 0.21248984336853027, 'timestamp': '2025-10-01 04:18:35.611775', 'step': 4666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:35.643850', 'step': 4666, 'epoch': 1} {'type': 'loss', 'content': 0.12769237160682678, 'timestamp': '2025-10-01 04:18:35.650099', 'step': 4667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:35.681203', 'step': 4667, 'epoch': 1} {'type': 'loss', 'content': 0.2254660725593567, 'timestamp': '2025-10-01 04:18:35.704610', 'step': 4668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:35.735553', 'step': 4668, 'epoch': 1} {'type': 'loss', 'content': 0.1512560248374939, 'timestamp': '2025-10-01 04:18:35.737940', 'step': 4669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:35.771398', 'step': 4669, 'epoch': 1} {'type': 'loss', 'content': 0.20387761294841766, 'timestamp': '2025-10-01 04:18:35.773786', 'step': 4670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:35.805375', 'step': 4670, 'epoch': 1} {'type': 'loss', 'content': 0.18851974606513977, 'timestamp': '2025-10-01 04:18:35.807422', 'step': 4671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:35.838967', 'step': 4671, 'epoch': 1} {'type': 'loss', 'content': 0.18148274719715118, 'timestamp': '2025-10-01 04:18:35.863134', 'step': 4672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:35.898439', 'step': 4672, 'epoch': 1} {'type': 'loss', 'content': 0.14000459015369415, 'timestamp': '2025-10-01 04:18:35.900323', 'step': 4673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:35.931304', 'step': 4673, 'epoch': 1} {'type': 'loss', 'content': 0.15305836498737335, 'timestamp': '2025-10-01 04:18:35.933217', 'step': 4674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:35.966259', 'step': 4674, 'epoch': 1} {'type': 'loss', 'content': 0.18362510204315186, 'timestamp': '2025-10-01 04:18:35.968814', 'step': 4675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:36.002474', 'step': 4675, 'epoch': 1} {'type': 'loss', 'content': 0.1757921427488327, 'timestamp': '2025-10-01 04:18:36.026087', 'step': 4676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:36.058719', 'step': 4676, 'epoch': 1} {'type': 'loss', 'content': 0.21205201745033264, 'timestamp': '2025-10-01 04:18:36.060780', 'step': 4677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:36.095243', 'step': 4677, 'epoch': 1} {'type': 'loss', 'content': 0.133427232503891, 'timestamp': '2025-10-01 04:18:36.097138', 'step': 4678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:36.131202', 'step': 4678, 'epoch': 1} {'type': 'loss', 'content': 0.19472584128379822, 'timestamp': '2025-10-01 04:18:36.133210', 'step': 4679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:36.164928', 'step': 4679, 'epoch': 1} {'type': 'loss', 'content': 0.16640545427799225, 'timestamp': '2025-10-01 04:18:36.188399', 'step': 4680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:36.221497', 'step': 4680, 'epoch': 1} {'type': 'loss', 'content': 0.11442621797323227, 'timestamp': '2025-10-01 04:18:36.223400', 'step': 4681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:36.255913', 'step': 4681, 'epoch': 1} {'type': 'loss', 'content': 0.1832011193037033, 'timestamp': '2025-10-01 04:18:36.257848', 'step': 4682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:36.289384', 'step': 4682, 'epoch': 1} {'type': 'loss', 'content': 0.09194907546043396, 'timestamp': '2025-10-01 04:18:36.291129', 'step': 4683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:36.324542', 'step': 4683, 'epoch': 1} {'type': 'loss', 'content': 0.22386039793491364, 'timestamp': '2025-10-01 04:18:36.348418', 'step': 4684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:36.404933', 'step': 4684, 'epoch': 1} {'type': 'loss', 'content': 0.16394738852977753, 'timestamp': '2025-10-01 04:18:36.407767', 'step': 4685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:36.439860', 'step': 4685, 'epoch': 1} {'type': 'loss', 'content': 0.18415780365467072, 'timestamp': '2025-10-01 04:18:36.442071', 'step': 4686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:36.485690', 'step': 4686, 'epoch': 1} {'type': 'loss', 'content': 0.15025661885738373, 'timestamp': '2025-10-01 04:18:36.488001', 'step': 4687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:36.520155', 'step': 4687, 'epoch': 1} {'type': 'loss', 'content': 0.10872479528188705, 'timestamp': '2025-10-01 04:18:36.543552', 'step': 4688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:36.582943', 'step': 4688, 'epoch': 1} {'type': 'loss', 'content': 0.05199059471487999, 'timestamp': '2025-10-01 04:18:36.585205', 'step': 4689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:36.617696', 'step': 4689, 'epoch': 1} {'type': 'loss', 'content': 0.09680584073066711, 'timestamp': '2025-10-01 04:18:36.620639', 'step': 4690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:36.651673', 'step': 4690, 'epoch': 1} {'type': 'loss', 'content': 0.1714843064546585, 'timestamp': '2025-10-01 04:18:36.654570', 'step': 4691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:36.685971', 'step': 4691, 'epoch': 1} {'type': 'loss', 'content': 0.09362725168466568, 'timestamp': '2025-10-01 04:18:36.709670', 'step': 4692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:36.743258', 'step': 4692, 'epoch': 1} {'type': 'loss', 'content': 0.10651470720767975, 'timestamp': '2025-10-01 04:18:36.745237', 'step': 4693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:36.777844', 'step': 4693, 'epoch': 1} {'type': 'loss', 'content': 0.17515091598033905, 'timestamp': '2025-10-01 04:18:36.790437', 'step': 4694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:36.823903', 'step': 4694, 'epoch': 1} {'type': 'loss', 'content': 0.16021721065044403, 'timestamp': '2025-10-01 04:18:36.826354', 'step': 4695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:36.860056', 'step': 4695, 'epoch': 1} {'type': 'loss', 'content': 0.14264178276062012, 'timestamp': '2025-10-01 04:18:36.883988', 'step': 4696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:36.916270', 'step': 4696, 'epoch': 1} {'type': 'loss', 'content': 0.09126116335391998, 'timestamp': '2025-10-01 04:18:36.918511', 'step': 4697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:36.950555', 'step': 4697, 'epoch': 1} {'type': 'loss', 'content': 0.23252083361148834, 'timestamp': '2025-10-01 04:18:36.952791', 'step': 4698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:36.985075', 'step': 4698, 'epoch': 1} {'type': 'loss', 'content': 0.1449693888425827, 'timestamp': '2025-10-01 04:18:36.987046', 'step': 4699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.021015', 'step': 4699, 'epoch': 1} {'type': 'loss', 'content': 0.24701142311096191, 'timestamp': '2025-10-01 04:18:37.044877', 'step': 4700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:37.086887', 'step': 4700, 'epoch': 1} {'type': 'loss', 'content': 0.1499483287334442, 'timestamp': '2025-10-01 04:18:37.089523', 'step': 4701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.121358', 'step': 4701, 'epoch': 1} {'type': 'loss', 'content': 0.12896034121513367, 'timestamp': '2025-10-01 04:18:37.124041', 'step': 4702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:37.155290', 'step': 4702, 'epoch': 1} {'type': 'loss', 'content': 0.11152320355176926, 'timestamp': '2025-10-01 04:18:37.158129', 'step': 4703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:37.189438', 'step': 4703, 'epoch': 1} {'type': 'loss', 'content': 0.19496802985668182, 'timestamp': '2025-10-01 04:18:37.213688', 'step': 4704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.245033', 'step': 4704, 'epoch': 1} {'type': 'loss', 'content': 0.09547965228557587, 'timestamp': '2025-10-01 04:18:37.247887', 'step': 4705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:37.281056', 'step': 4705, 'epoch': 1} {'type': 'loss', 'content': 0.1158415824174881, 'timestamp': '2025-10-01 04:18:37.285618', 'step': 4706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.320160', 'step': 4706, 'epoch': 1} {'type': 'loss', 'content': 0.15196660161018372, 'timestamp': '2025-10-01 04:18:37.322210', 'step': 4707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:37.353141', 'step': 4707, 'epoch': 1} {'type': 'loss', 'content': 0.14532765746116638, 'timestamp': '2025-10-01 04:18:37.376709', 'step': 4708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:37.408923', 'step': 4708, 'epoch': 1} {'type': 'loss', 'content': 0.12353295832872391, 'timestamp': '2025-10-01 04:18:37.411204', 'step': 4709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.443450', 'step': 4709, 'epoch': 1} {'type': 'loss', 'content': 0.142161026597023, 'timestamp': '2025-10-01 04:18:37.446978', 'step': 4710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:37.478119', 'step': 4710, 'epoch': 1} {'type': 'loss', 'content': 0.16623930633068085, 'timestamp': '2025-10-01 04:18:37.480427', 'step': 4711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.511939', 'step': 4711, 'epoch': 1} {'type': 'loss', 'content': 0.1487235277891159, 'timestamp': '2025-10-01 04:18:37.542783', 'step': 4712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.598746', 'step': 4712, 'epoch': 1} {'type': 'loss', 'content': 0.173000305891037, 'timestamp': '2025-10-01 04:18:37.602104', 'step': 4713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:37.635615', 'step': 4713, 'epoch': 1} {'type': 'loss', 'content': 0.39882051944732666, 'timestamp': '2025-10-01 04:18:37.639263', 'step': 4714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:37.673480', 'step': 4714, 'epoch': 1} {'type': 'loss', 'content': 0.23799532651901245, 'timestamp': '2025-10-01 04:18:37.680895', 'step': 4715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.712310', 'step': 4715, 'epoch': 1} {'type': 'loss', 'content': 0.16967199742794037, 'timestamp': '2025-10-01 04:18:37.740090', 'step': 4716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.775533', 'step': 4716, 'epoch': 1} {'type': 'loss', 'content': 0.17035217583179474, 'timestamp': '2025-10-01 04:18:37.777929', 'step': 4717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.809961', 'step': 4717, 'epoch': 1} {'type': 'loss', 'content': 0.11847854405641556, 'timestamp': '2025-10-01 04:18:37.811963', 'step': 4718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:37.845807', 'step': 4718, 'epoch': 1} {'type': 'loss', 'content': 0.15250419080257416, 'timestamp': '2025-10-01 04:18:37.868864', 'step': 4719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:37.908717', 'step': 4719, 'epoch': 1} {'type': 'loss', 'content': 0.0664965882897377, 'timestamp': '2025-10-01 04:18:37.933764', 'step': 4720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:37.982223', 'step': 4720, 'epoch': 1} {'type': 'loss', 'content': 0.20484308898448944, 'timestamp': '2025-10-01 04:18:37.993760', 'step': 4721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:38.024687', 'step': 4721, 'epoch': 1} {'type': 'loss', 'content': 0.1179187223315239, 'timestamp': '2025-10-01 04:18:38.030921', 'step': 4722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:38.078309', 'step': 4722, 'epoch': 1} {'type': 'loss', 'content': 0.1660720556974411, 'timestamp': '2025-10-01 04:18:38.085215', 'step': 4723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:38.122408', 'step': 4723, 'epoch': 1} {'type': 'loss', 'content': 0.12524084746837616, 'timestamp': '2025-10-01 04:18:38.155012', 'step': 4724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:38.192144', 'step': 4724, 'epoch': 1} {'type': 'loss', 'content': 0.11237646639347076, 'timestamp': '2025-10-01 04:18:38.195571', 'step': 4725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:38.230540', 'step': 4725, 'epoch': 1} {'type': 'loss', 'content': 0.28894034028053284, 'timestamp': '2025-10-01 04:18:38.232725', 'step': 4726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:38.266298', 'step': 4726, 'epoch': 1} {'type': 'loss', 'content': 0.23846307396888733, 'timestamp': '2025-10-01 04:18:38.273082', 'step': 4727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:38.311626', 'step': 4727, 'epoch': 1} {'type': 'loss', 'content': 0.2304934859275818, 'timestamp': '2025-10-01 04:18:38.344648', 'step': 4728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:38.378138', 'step': 4728, 'epoch': 1} {'type': 'loss', 'content': 0.0669635608792305, 'timestamp': '2025-10-01 04:18:38.380995', 'step': 4729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:38.418923', 'step': 4729, 'epoch': 1} {'type': 'loss', 'content': 0.11374710500240326, 'timestamp': '2025-10-01 04:18:38.423742', 'step': 4730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:38.454531', 'step': 4730, 'epoch': 1} {'type': 'loss', 'content': 0.15418365597724915, 'timestamp': '2025-10-01 04:18:38.464176', 'step': 4731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:38.516152', 'step': 4731, 'epoch': 1} {'type': 'loss', 'content': 0.17026379704475403, 'timestamp': '2025-10-01 04:18:38.552305', 'step': 4732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:38.583689', 'step': 4732, 'epoch': 1} {'type': 'loss', 'content': 0.14820946753025055, 'timestamp': '2025-10-01 04:18:38.589227', 'step': 4733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:38.619711', 'step': 4733, 'epoch': 1} {'type': 'loss', 'content': 0.14336015284061432, 'timestamp': '2025-10-01 04:18:38.630248', 'step': 4734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:38.667233', 'step': 4734, 'epoch': 1} {'type': 'loss', 'content': 0.24223682284355164, 'timestamp': '2025-10-01 04:18:38.670033', 'step': 4735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:38.720864', 'step': 4735, 'epoch': 1} {'type': 'loss', 'content': 0.1552472859621048, 'timestamp': '2025-10-01 04:18:38.753234', 'step': 4736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:38.789251', 'step': 4736, 'epoch': 1} {'type': 'loss', 'content': 0.10466600954532623, 'timestamp': '2025-10-01 04:18:38.793865', 'step': 4737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:38.832133', 'step': 4737, 'epoch': 1} {'type': 'loss', 'content': 0.16259004175662994, 'timestamp': '2025-10-01 04:18:38.836498', 'step': 4738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:38.871142', 'step': 4738, 'epoch': 1} {'type': 'loss', 'content': 0.17491847276687622, 'timestamp': '2025-10-01 04:18:38.878573', 'step': 4739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:38.928883', 'step': 4739, 'epoch': 1} {'type': 'loss', 'content': 0.11824887245893478, 'timestamp': '2025-10-01 04:18:38.960619', 'step': 4740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:38.994443', 'step': 4740, 'epoch': 1} {'type': 'loss', 'content': 0.21123309433460236, 'timestamp': '2025-10-01 04:18:38.999953', 'step': 4741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:39.041531', 'step': 4741, 'epoch': 1} {'type': 'loss', 'content': 0.10759981721639633, 'timestamp': '2025-10-01 04:18:39.043915', 'step': 4742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:39.087354', 'step': 4742, 'epoch': 1} {'type': 'loss', 'content': 0.16692255437374115, 'timestamp': '2025-10-01 04:18:39.096741', 'step': 4743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:39.128863', 'step': 4743, 'epoch': 1} {'type': 'loss', 'content': 0.09400536119937897, 'timestamp': '2025-10-01 04:18:39.154491', 'step': 4744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:39.187879', 'step': 4744, 'epoch': 1} {'type': 'loss', 'content': 0.08127850294113159, 'timestamp': '2025-10-01 04:18:39.189925', 'step': 4745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:39.228316', 'step': 4745, 'epoch': 1} {'type': 'loss', 'content': 0.2587442398071289, 'timestamp': '2025-10-01 04:18:39.230379', 'step': 4746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:39.261538', 'step': 4746, 'epoch': 1} {'type': 'loss', 'content': 0.3080136775970459, 'timestamp': '2025-10-01 04:18:39.263601', 'step': 4747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:39.294620', 'step': 4747, 'epoch': 1} {'type': 'loss', 'content': 0.10815359652042389, 'timestamp': '2025-10-01 04:18:39.318708', 'step': 4748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:39.362888', 'step': 4748, 'epoch': 1} {'type': 'loss', 'content': 0.19220894575119019, 'timestamp': '2025-10-01 04:18:39.364808', 'step': 4749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:39.397856', 'step': 4749, 'epoch': 1} {'type': 'loss', 'content': 0.1270178109407425, 'timestamp': '2025-10-01 04:18:39.399789', 'step': 4750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:39.431889', 'step': 4750, 'epoch': 1} {'type': 'loss', 'content': 0.2220567762851715, 'timestamp': '2025-10-01 04:18:39.433739', 'step': 4751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:39.467972', 'step': 4751, 'epoch': 1} {'type': 'loss', 'content': 0.17553335428237915, 'timestamp': '2025-10-01 04:18:39.491649', 'step': 4752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:39.535448', 'step': 4752, 'epoch': 1} {'type': 'loss', 'content': 0.23771190643310547, 'timestamp': '2025-10-01 04:18:39.537420', 'step': 4753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:39.572904', 'step': 4753, 'epoch': 1} {'type': 'loss', 'content': 0.19208382070064545, 'timestamp': '2025-10-01 04:18:39.575089', 'step': 4754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:39.609617', 'step': 4754, 'epoch': 1} {'type': 'loss', 'content': 0.17702078819274902, 'timestamp': '2025-10-01 04:18:39.611427', 'step': 4755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:39.645979', 'step': 4755, 'epoch': 1} {'type': 'loss', 'content': 0.18999837338924408, 'timestamp': '2025-10-01 04:18:39.669562', 'step': 4756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:39.710545', 'step': 4756, 'epoch': 1} {'type': 'loss', 'content': 0.16408494114875793, 'timestamp': '2025-10-01 04:18:39.712371', 'step': 4757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:39.744399', 'step': 4757, 'epoch': 1} {'type': 'loss', 'content': 0.10642115771770477, 'timestamp': '2025-10-01 04:18:39.749873', 'step': 4758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:39.801286', 'step': 4758, 'epoch': 1} {'type': 'loss', 'content': 0.17857028543949127, 'timestamp': '2025-10-01 04:18:39.803776', 'step': 4759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:39.837464', 'step': 4759, 'epoch': 1} {'type': 'loss', 'content': 0.18499678373336792, 'timestamp': '2025-10-01 04:18:39.860956', 'step': 4760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:39.900812', 'step': 4760, 'epoch': 1} {'type': 'loss', 'content': 0.20427055656909943, 'timestamp': '2025-10-01 04:18:39.908416', 'step': 4761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:39.947317', 'step': 4761, 'epoch': 1} {'type': 'loss', 'content': 0.056634172797203064, 'timestamp': '2025-10-01 04:18:39.949509', 'step': 4762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:39.988682', 'step': 4762, 'epoch': 1} {'type': 'loss', 'content': 0.06361648440361023, 'timestamp': '2025-10-01 04:18:39.990739', 'step': 4763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:40.035505', 'step': 4763, 'epoch': 1} {'type': 'loss', 'content': 0.12073491513729095, 'timestamp': '2025-10-01 04:18:40.058925', 'step': 4764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:40.093265', 'step': 4764, 'epoch': 1} {'type': 'loss', 'content': 0.16517093777656555, 'timestamp': '2025-10-01 04:18:40.095480', 'step': 4765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:40.142587', 'step': 4765, 'epoch': 1} {'type': 'loss', 'content': 0.2867647111415863, 'timestamp': '2025-10-01 04:18:40.148804', 'step': 4766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:40.184946', 'step': 4766, 'epoch': 1} {'type': 'loss', 'content': 0.2308425009250641, 'timestamp': '2025-10-01 04:18:40.187001', 'step': 4767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:40.242971', 'step': 4767, 'epoch': 1} {'type': 'loss', 'content': 0.24658404290676117, 'timestamp': '2025-10-01 04:18:40.266453', 'step': 4768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:40.314226', 'step': 4768, 'epoch': 1} {'type': 'loss', 'content': 0.14477907121181488, 'timestamp': '2025-10-01 04:18:40.320238', 'step': 4769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:40.362699', 'step': 4769, 'epoch': 1} {'type': 'loss', 'content': 0.21271193027496338, 'timestamp': '2025-10-01 04:18:40.364677', 'step': 4770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:40.404565', 'step': 4770, 'epoch': 1} {'type': 'loss', 'content': 0.11363150179386139, 'timestamp': '2025-10-01 04:18:40.407645', 'step': 4771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:40.438857', 'step': 4771, 'epoch': 1} {'type': 'loss', 'content': 0.09451606869697571, 'timestamp': '2025-10-01 04:18:40.462345', 'step': 4772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:40.497159', 'step': 4772, 'epoch': 1} {'type': 'loss', 'content': 0.17937268316745758, 'timestamp': '2025-10-01 04:18:40.499070', 'step': 4773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:40.541340', 'step': 4773, 'epoch': 1} {'type': 'loss', 'content': 0.21946413815021515, 'timestamp': '2025-10-01 04:18:40.543527', 'step': 4774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:40.585368', 'step': 4774, 'epoch': 1} {'type': 'loss', 'content': 0.19213519990444183, 'timestamp': '2025-10-01 04:18:40.587403', 'step': 4775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:40.620161', 'step': 4775, 'epoch': 1} {'type': 'loss', 'content': 0.0848584696650505, 'timestamp': '2025-10-01 04:18:40.643563', 'step': 4776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:40.697324', 'step': 4776, 'epoch': 1} {'type': 'loss', 'content': 0.13834840059280396, 'timestamp': '2025-10-01 04:18:40.699296', 'step': 4777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:40.732996', 'step': 4777, 'epoch': 1} {'type': 'loss', 'content': 0.12208180129528046, 'timestamp': '2025-10-01 04:18:40.739377', 'step': 4778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:40.772714', 'step': 4778, 'epoch': 1} {'type': 'loss', 'content': 0.1537257730960846, 'timestamp': '2025-10-01 04:18:40.774513', 'step': 4779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:40.807099', 'step': 4779, 'epoch': 1} {'type': 'loss', 'content': 0.16411961615085602, 'timestamp': '2025-10-01 04:18:40.834683', 'step': 4780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:40.868358', 'step': 4780, 'epoch': 1} {'type': 'loss', 'content': 0.26724672317504883, 'timestamp': '2025-10-01 04:18:40.870505', 'step': 4781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:40.904143', 'step': 4781, 'epoch': 1} {'type': 'loss', 'content': 0.22922907769680023, 'timestamp': '2025-10-01 04:18:40.906346', 'step': 4782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:40.942017', 'step': 4782, 'epoch': 1} {'type': 'loss', 'content': 0.10372205823659897, 'timestamp': '2025-10-01 04:18:40.944043', 'step': 4783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:40.979309', 'step': 4783, 'epoch': 1} {'type': 'loss', 'content': 0.2502685487270355, 'timestamp': '2025-10-01 04:18:41.005605', 'step': 4784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:41.036690', 'step': 4784, 'epoch': 1} {'type': 'loss', 'content': 0.13191063702106476, 'timestamp': '2025-10-01 04:18:41.038647', 'step': 4785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:41.071166', 'step': 4785, 'epoch': 1} {'type': 'loss', 'content': 0.18240955471992493, 'timestamp': '2025-10-01 04:18:41.073097', 'step': 4786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:41.108219', 'step': 4786, 'epoch': 1} {'type': 'loss', 'content': 0.12096966058015823, 'timestamp': '2025-10-01 04:18:41.110979', 'step': 4787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:41.160581', 'step': 4787, 'epoch': 1} {'type': 'loss', 'content': 0.13758079707622528, 'timestamp': '2025-10-01 04:18:41.186875', 'step': 4788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:41.240787', 'step': 4788, 'epoch': 1} {'type': 'loss', 'content': 0.13952386379241943, 'timestamp': '2025-10-01 04:18:41.245240', 'step': 4789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:41.304533', 'step': 4789, 'epoch': 1} {'type': 'loss', 'content': 0.16986599564552307, 'timestamp': '2025-10-01 04:18:41.306613', 'step': 4790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:41.349803', 'step': 4790, 'epoch': 1} {'type': 'loss', 'content': 0.1432780623435974, 'timestamp': '2025-10-01 04:18:41.361379', 'step': 4791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:41.406845', 'step': 4791, 'epoch': 1} {'type': 'loss', 'content': 0.2720799744129181, 'timestamp': '2025-10-01 04:18:41.430247', 'step': 4792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:41.504691', 'step': 4792, 'epoch': 1} {'type': 'loss', 'content': 0.2731419503688812, 'timestamp': '2025-10-01 04:18:41.506595', 'step': 4793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:41.549618', 'step': 4793, 'epoch': 1} {'type': 'loss', 'content': 0.1526866853237152, 'timestamp': '2025-10-01 04:18:41.551913', 'step': 4794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:41.598164', 'step': 4794, 'epoch': 1} {'type': 'loss', 'content': 0.17476239800453186, 'timestamp': '2025-10-01 04:18:41.600659', 'step': 4795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:41.634871', 'step': 4795, 'epoch': 1} {'type': 'loss', 'content': 0.11822862923145294, 'timestamp': '2025-10-01 04:18:41.660778', 'step': 4796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:41.706555', 'step': 4796, 'epoch': 1} {'type': 'loss', 'content': 0.18580879271030426, 'timestamp': '2025-10-01 04:18:41.708640', 'step': 4797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:41.739854', 'step': 4797, 'epoch': 1} {'type': 'loss', 'content': 0.15028958022594452, 'timestamp': '2025-10-01 04:18:41.741957', 'step': 4798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:41.841547', 'step': 4798, 'epoch': 1} {'type': 'loss', 'content': 0.158419668674469, 'timestamp': '2025-10-01 04:18:41.843493', 'step': 4799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:41.887590', 'step': 4799, 'epoch': 1} {'type': 'loss', 'content': 0.1470707654953003, 'timestamp': '2025-10-01 04:18:41.911010', 'step': 4800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:41.950372', 'step': 4800, 'epoch': 1} {'type': 'loss', 'content': 0.13568684458732605, 'timestamp': '2025-10-01 04:18:41.952346', 'step': 4801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:42.011918', 'step': 4801, 'epoch': 1} {'type': 'loss', 'content': 0.17407651245594025, 'timestamp': '2025-10-01 04:18:42.013910', 'step': 4802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:42.052933', 'step': 4802, 'epoch': 1} {'type': 'loss', 'content': 0.16323019564151764, 'timestamp': '2025-10-01 04:18:42.054891', 'step': 4803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:42.112753', 'step': 4803, 'epoch': 1} {'type': 'loss', 'content': 0.18482981622219086, 'timestamp': '2025-10-01 04:18:42.140385', 'step': 4804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:42.181007', 'step': 4804, 'epoch': 1} {'type': 'loss', 'content': 0.29898735880851746, 'timestamp': '2025-10-01 04:18:42.183000', 'step': 4805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:42.245750', 'step': 4805, 'epoch': 1} {'type': 'loss', 'content': 0.17337921261787415, 'timestamp': '2025-10-01 04:18:42.247875', 'step': 4806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:42.297341', 'step': 4806, 'epoch': 1} {'type': 'loss', 'content': 0.08978626877069473, 'timestamp': '2025-10-01 04:18:42.304746', 'step': 4807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:42.341877', 'step': 4807, 'epoch': 1} {'type': 'loss', 'content': 0.16018080711364746, 'timestamp': '2025-10-01 04:18:42.365547', 'step': 4808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:42.402223', 'step': 4808, 'epoch': 1} {'type': 'loss', 'content': 0.1678725928068161, 'timestamp': '2025-10-01 04:18:42.404138', 'step': 4809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:42.436339', 'step': 4809, 'epoch': 1} {'type': 'loss', 'content': 0.19401781260967255, 'timestamp': '2025-10-01 04:18:42.439259', 'step': 4810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:42.482015', 'step': 4810, 'epoch': 1} {'type': 'loss', 'content': 0.12195269018411636, 'timestamp': '2025-10-01 04:18:42.483756', 'step': 4811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:42.542231', 'step': 4811, 'epoch': 1} {'type': 'loss', 'content': 0.19620837271213531, 'timestamp': '2025-10-01 04:18:42.570394', 'step': 4812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:42.619006', 'step': 4812, 'epoch': 1} {'type': 'loss', 'content': 0.2525063455104828, 'timestamp': '2025-10-01 04:18:42.620989', 'step': 4813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:42.653559', 'step': 4813, 'epoch': 1} {'type': 'loss', 'content': 0.24744124710559845, 'timestamp': '2025-10-01 04:18:42.655499', 'step': 4814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:42.697586', 'step': 4814, 'epoch': 1} {'type': 'loss', 'content': 0.0860435888171196, 'timestamp': '2025-10-01 04:18:42.699424', 'step': 4815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:42.731381', 'step': 4815, 'epoch': 1} {'type': 'loss', 'content': 0.16726528108119965, 'timestamp': '2025-10-01 04:18:42.754999', 'step': 4816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:42.796420', 'step': 4816, 'epoch': 1} {'type': 'loss', 'content': 0.15923580527305603, 'timestamp': '2025-10-01 04:18:42.798350', 'step': 4817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:42.830295', 'step': 4817, 'epoch': 1} {'type': 'loss', 'content': 0.1209912896156311, 'timestamp': '2025-10-01 04:18:42.832379', 'step': 4818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:42.864928', 'step': 4818, 'epoch': 1} {'type': 'loss', 'content': 0.10099226981401443, 'timestamp': '2025-10-01 04:18:42.867006', 'step': 4819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:42.899680', 'step': 4819, 'epoch': 1} {'type': 'loss', 'content': 0.12242013216018677, 'timestamp': '2025-10-01 04:18:42.932558', 'step': 4820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:42.967064', 'step': 4820, 'epoch': 1} {'type': 'loss', 'content': 0.15168294310569763, 'timestamp': '2025-10-01 04:18:42.977544', 'step': 4821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:43.015855', 'step': 4821, 'epoch': 1} {'type': 'loss', 'content': 0.18758180737495422, 'timestamp': '2025-10-01 04:18:43.017778', 'step': 4822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:43.051116', 'step': 4822, 'epoch': 1} {'type': 'loss', 'content': 0.06352823972702026, 'timestamp': '2025-10-01 04:18:43.053896', 'step': 4823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:43.098072', 'step': 4823, 'epoch': 1} {'type': 'loss', 'content': 0.09963279217481613, 'timestamp': '2025-10-01 04:18:43.121563', 'step': 4824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:43.167701', 'step': 4824, 'epoch': 1} {'type': 'loss', 'content': 0.10390922427177429, 'timestamp': '2025-10-01 04:18:43.169908', 'step': 4825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:43.208370', 'step': 4825, 'epoch': 1} {'type': 'loss', 'content': 0.08544784784317017, 'timestamp': '2025-10-01 04:18:43.210537', 'step': 4826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:43.250023', 'step': 4826, 'epoch': 1} {'type': 'loss', 'content': 0.07489249110221863, 'timestamp': '2025-10-01 04:18:43.252154', 'step': 4827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:43.299037', 'step': 4827, 'epoch': 1} {'type': 'loss', 'content': 0.10666165500879288, 'timestamp': '2025-10-01 04:18:43.322492', 'step': 4828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:43.363115', 'step': 4828, 'epoch': 1} {'type': 'loss', 'content': 0.15049052238464355, 'timestamp': '2025-10-01 04:18:43.365136', 'step': 4829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:43.401982', 'step': 4829, 'epoch': 1} {'type': 'loss', 'content': 0.1424160748720169, 'timestamp': '2025-10-01 04:18:43.403869', 'step': 4830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:43.443994', 'step': 4830, 'epoch': 1} {'type': 'loss', 'content': 0.1296379119157791, 'timestamp': '2025-10-01 04:18:43.446516', 'step': 4831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:43.489506', 'step': 4831, 'epoch': 1} {'type': 'loss', 'content': 0.20906241238117218, 'timestamp': '2025-10-01 04:18:43.513089', 'step': 4832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:43.547366', 'step': 4832, 'epoch': 1} {'type': 'loss', 'content': 0.0931326374411583, 'timestamp': '2025-10-01 04:18:43.550808', 'step': 4833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:43.587730', 'step': 4833, 'epoch': 1} {'type': 'loss', 'content': 0.1472889930009842, 'timestamp': '2025-10-01 04:18:43.591011', 'step': 4834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:43.623219', 'step': 4834, 'epoch': 1} {'type': 'loss', 'content': 0.09904909878969193, 'timestamp': '2025-10-01 04:18:43.625151', 'step': 4835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:43.660508', 'step': 4835, 'epoch': 1} {'type': 'loss', 'content': 0.1326197385787964, 'timestamp': '2025-10-01 04:18:43.683809', 'step': 4836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:43.722518', 'step': 4836, 'epoch': 1} {'type': 'loss', 'content': 0.06791282445192337, 'timestamp': '2025-10-01 04:18:43.724503', 'step': 4837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:43.764500', 'step': 4837, 'epoch': 1} {'type': 'loss', 'content': 0.18203848600387573, 'timestamp': '2025-10-01 04:18:43.766533', 'step': 4838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:43.807582', 'step': 4838, 'epoch': 1} {'type': 'loss', 'content': 0.24550028145313263, 'timestamp': '2025-10-01 04:18:43.809519', 'step': 4839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:43.842601', 'step': 4839, 'epoch': 1} {'type': 'loss', 'content': 0.13454937934875488, 'timestamp': '2025-10-01 04:18:43.866081', 'step': 4840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:43.907910', 'step': 4840, 'epoch': 1} {'type': 'loss', 'content': 0.13805823028087616, 'timestamp': '2025-10-01 04:18:43.914101', 'step': 4841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:43.948280', 'step': 4841, 'epoch': 1} {'type': 'loss', 'content': 0.1390257030725479, 'timestamp': '2025-10-01 04:18:43.961865', 'step': 4842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:43.996416', 'step': 4842, 'epoch': 1} {'type': 'loss', 'content': 0.1989295482635498, 'timestamp': '2025-10-01 04:18:43.998904', 'step': 4843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:44.033440', 'step': 4843, 'epoch': 1} {'type': 'loss', 'content': 0.12673956155776978, 'timestamp': '2025-10-01 04:18:44.056991', 'step': 4844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:44.088178', 'step': 4844, 'epoch': 1} {'type': 'loss', 'content': 0.17383797466754913, 'timestamp': '2025-10-01 04:18:44.096636', 'step': 4845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:44.131619', 'step': 4845, 'epoch': 1} {'type': 'loss', 'content': 0.06773772090673447, 'timestamp': '2025-10-01 04:18:44.133785', 'step': 4846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:44.174290', 'step': 4846, 'epoch': 1} {'type': 'loss', 'content': 0.15487581491470337, 'timestamp': '2025-10-01 04:18:44.177587', 'step': 4847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:44.209510', 'step': 4847, 'epoch': 1} {'type': 'loss', 'content': 0.16573412716388702, 'timestamp': '2025-10-01 04:18:44.233168', 'step': 4848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:44.280287', 'step': 4848, 'epoch': 1} {'type': 'loss', 'content': 0.2887081205844879, 'timestamp': '2025-10-01 04:18:44.282564', 'step': 4849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:44.329168', 'step': 4849, 'epoch': 1} {'type': 'loss', 'content': 0.13013491034507751, 'timestamp': '2025-10-01 04:18:44.331354', 'step': 4850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:44.364152', 'step': 4850, 'epoch': 1} {'type': 'loss', 'content': 0.13133183121681213, 'timestamp': '2025-10-01 04:18:44.366202', 'step': 4851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:44.397461', 'step': 4851, 'epoch': 1} {'type': 'loss', 'content': 0.18756797909736633, 'timestamp': '2025-10-01 04:18:44.422987', 'step': 4852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:44.455168', 'step': 4852, 'epoch': 1} {'type': 'loss', 'content': 0.14040718972682953, 'timestamp': '2025-10-01 04:18:44.457565', 'step': 4853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:44.488795', 'step': 4853, 'epoch': 1} {'type': 'loss', 'content': 0.15068598091602325, 'timestamp': '2025-10-01 04:18:44.491208', 'step': 4854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:44.531010', 'step': 4854, 'epoch': 1} {'type': 'loss', 'content': 0.16080133616924286, 'timestamp': '2025-10-01 04:18:44.533778', 'step': 4855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:44.565027', 'step': 4855, 'epoch': 1} {'type': 'loss', 'content': 0.14845910668373108, 'timestamp': '2025-10-01 04:18:44.589438', 'step': 4856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:44.621545', 'step': 4856, 'epoch': 1} {'type': 'loss', 'content': 0.06426084041595459, 'timestamp': '2025-10-01 04:18:44.624023', 'step': 4857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:44.658203', 'step': 4857, 'epoch': 1} {'type': 'loss', 'content': 0.307910680770874, 'timestamp': '2025-10-01 04:18:44.660800', 'step': 4858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:44.696564', 'step': 4858, 'epoch': 1} {'type': 'loss', 'content': 0.19097958505153656, 'timestamp': '2025-10-01 04:18:44.699961', 'step': 4859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:44.740194', 'step': 4859, 'epoch': 1} {'type': 'loss', 'content': 0.10442575812339783, 'timestamp': '2025-10-01 04:18:44.763667', 'step': 4860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:44.801847', 'step': 4860, 'epoch': 1} {'type': 'loss', 'content': 0.20614144206047058, 'timestamp': '2025-10-01 04:18:44.805829', 'step': 4861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:44.845178', 'step': 4861, 'epoch': 1} {'type': 'loss', 'content': 0.13994379341602325, 'timestamp': '2025-10-01 04:18:44.847841', 'step': 4862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:44.881333', 'step': 4862, 'epoch': 1} {'type': 'loss', 'content': 0.28378915786743164, 'timestamp': '2025-10-01 04:18:44.886758', 'step': 4863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:44.920149', 'step': 4863, 'epoch': 1} {'type': 'loss', 'content': 0.09622669965028763, 'timestamp': '2025-10-01 04:18:44.943975', 'step': 4864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:44.977110', 'step': 4864, 'epoch': 1} {'type': 'loss', 'content': 0.14468318223953247, 'timestamp': '2025-10-01 04:18:44.980612', 'step': 4865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:18:45.013684', 'step': 4865, 'epoch': 1} {'type': 'loss', 'content': 0.19977550208568573, 'timestamp': '2025-10-01 04:18:45.017934', 'step': 4866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:45.048629', 'step': 4866, 'epoch': 1} {'type': 'loss', 'content': 0.186708465218544, 'timestamp': '2025-10-01 04:18:45.050782', 'step': 4867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:45.082697', 'step': 4867, 'epoch': 1} {'type': 'loss', 'content': 0.19759894907474518, 'timestamp': '2025-10-01 04:18:45.106064', 'step': 4868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:45.146541', 'step': 4868, 'epoch': 1} {'type': 'loss', 'content': 0.34047508239746094, 'timestamp': '2025-10-01 04:18:45.148748', 'step': 4869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:45.182002', 'step': 4869, 'epoch': 1} {'type': 'loss', 'content': 0.20913901925086975, 'timestamp': '2025-10-01 04:18:45.183905', 'step': 4870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:45.218225', 'step': 4870, 'epoch': 1} {'type': 'loss', 'content': 0.14173941314220428, 'timestamp': '2025-10-01 04:18:45.220271', 'step': 4871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:45.253872', 'step': 4871, 'epoch': 1} {'type': 'loss', 'content': 0.1075902134180069, 'timestamp': '2025-10-01 04:18:45.278335', 'step': 4872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:45.322209', 'step': 4872, 'epoch': 1} {'type': 'loss', 'content': 0.11453083902597427, 'timestamp': '2025-10-01 04:18:45.324318', 'step': 4873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:45.356993', 'step': 4873, 'epoch': 1} {'type': 'loss', 'content': 0.18257512152194977, 'timestamp': '2025-10-01 04:18:45.358882', 'step': 4874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:45.390628', 'step': 4874, 'epoch': 1} {'type': 'loss', 'content': 0.1829705834388733, 'timestamp': '2025-10-01 04:18:45.393566', 'step': 4875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:45.429373', 'step': 4875, 'epoch': 1} {'type': 'loss', 'content': 0.1500864326953888, 'timestamp': '2025-10-01 04:18:45.453607', 'step': 4876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:45.493223', 'step': 4876, 'epoch': 1} {'type': 'loss', 'content': 0.18044516444206238, 'timestamp': '2025-10-01 04:18:45.495162', 'step': 4877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:45.530893', 'step': 4877, 'epoch': 1} {'type': 'loss', 'content': 0.09204276651144028, 'timestamp': '2025-10-01 04:18:45.533337', 'step': 4878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:45.567107', 'step': 4878, 'epoch': 1} {'type': 'loss', 'content': 0.2584507167339325, 'timestamp': '2025-10-01 04:18:45.569109', 'step': 4879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:45.610590', 'step': 4879, 'epoch': 1} {'type': 'loss', 'content': 0.13333554565906525, 'timestamp': '2025-10-01 04:18:45.634003', 'step': 4880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:45.665046', 'step': 4880, 'epoch': 1} {'type': 'loss', 'content': 0.20975567400455475, 'timestamp': '2025-10-01 04:18:45.667150', 'step': 4881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:45.697852', 'step': 4881, 'epoch': 1} {'type': 'loss', 'content': 0.17402958869934082, 'timestamp': '2025-10-01 04:18:45.699807', 'step': 4882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:45.736361', 'step': 4882, 'epoch': 1} {'type': 'loss', 'content': 0.10714586824178696, 'timestamp': '2025-10-01 04:18:45.738888', 'step': 4883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:45.770056', 'step': 4883, 'epoch': 1} {'type': 'loss', 'content': 0.1683695763349533, 'timestamp': '2025-10-01 04:18:45.794024', 'step': 4884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:45.828603', 'step': 4884, 'epoch': 1} {'type': 'loss', 'content': 0.1515721082687378, 'timestamp': '2025-10-01 04:18:45.830885', 'step': 4885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:45.865151', 'step': 4885, 'epoch': 1} {'type': 'loss', 'content': 0.19879455864429474, 'timestamp': '2025-10-01 04:18:45.868067', 'step': 4886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:45.906289', 'step': 4886, 'epoch': 1} {'type': 'loss', 'content': 0.17548280954360962, 'timestamp': '2025-10-01 04:18:45.910129', 'step': 4887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:45.941748', 'step': 4887, 'epoch': 1} {'type': 'loss', 'content': 0.13573849201202393, 'timestamp': '2025-10-01 04:18:45.972567', 'step': 4888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:46.006901', 'step': 4888, 'epoch': 1} {'type': 'loss', 'content': 0.18402662873268127, 'timestamp': '2025-10-01 04:18:46.010777', 'step': 4889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:46.050446', 'step': 4889, 'epoch': 1} {'type': 'loss', 'content': 0.20082536339759827, 'timestamp': '2025-10-01 04:18:46.053397', 'step': 4890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:46.085740', 'step': 4890, 'epoch': 1} {'type': 'loss', 'content': 0.16144585609436035, 'timestamp': '2025-10-01 04:18:46.088484', 'step': 4891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:46.121823', 'step': 4891, 'epoch': 1} {'type': 'loss', 'content': 0.13125833868980408, 'timestamp': '2025-10-01 04:18:46.145558', 'step': 4892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:46.178916', 'step': 4892, 'epoch': 1} {'type': 'loss', 'content': 0.2195776104927063, 'timestamp': '2025-10-01 04:18:46.181784', 'step': 4893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:46.213066', 'step': 4893, 'epoch': 1} {'type': 'loss', 'content': 0.1855868697166443, 'timestamp': '2025-10-01 04:18:46.215425', 'step': 4894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:46.251195', 'step': 4894, 'epoch': 1} {'type': 'loss', 'content': 0.2704090178012848, 'timestamp': '2025-10-01 04:18:46.253518', 'step': 4895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:46.289480', 'step': 4895, 'epoch': 1} {'type': 'loss', 'content': 0.1139594241976738, 'timestamp': '2025-10-01 04:18:46.313007', 'step': 4896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:46.344533', 'step': 4896, 'epoch': 1} {'type': 'loss', 'content': 0.17871546745300293, 'timestamp': '2025-10-01 04:18:46.346594', 'step': 4897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:46.382666', 'step': 4897, 'epoch': 1} {'type': 'loss', 'content': 0.09063083678483963, 'timestamp': '2025-10-01 04:18:46.384574', 'step': 4898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:46.421124', 'step': 4898, 'epoch': 1} {'type': 'loss', 'content': 0.17607831954956055, 'timestamp': '2025-10-01 04:18:46.423493', 'step': 4899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:46.461004', 'step': 4899, 'epoch': 1} {'type': 'loss', 'content': 0.19210641086101532, 'timestamp': '2025-10-01 04:18:46.485670', 'step': 4900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:46.521978', 'step': 4900, 'epoch': 1} {'type': 'loss', 'content': 0.12233705073595047, 'timestamp': '2025-10-01 04:18:46.532345', 'step': 4901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:46.567533', 'step': 4901, 'epoch': 1} {'type': 'loss', 'content': 0.16406115889549255, 'timestamp': '2025-10-01 04:18:46.570247', 'step': 4902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:46.601710', 'step': 4902, 'epoch': 1} {'type': 'loss', 'content': 0.18621709942817688, 'timestamp': '2025-10-01 04:18:46.611710', 'step': 4903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:46.648434', 'step': 4903, 'epoch': 1} {'type': 'loss', 'content': 0.12496580183506012, 'timestamp': '2025-10-01 04:18:46.673190', 'step': 4904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:46.708078', 'step': 4904, 'epoch': 1} {'type': 'loss', 'content': 0.13093708455562592, 'timestamp': '2025-10-01 04:18:46.717564', 'step': 4905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:46.748420', 'step': 4905, 'epoch': 1} {'type': 'loss', 'content': 0.13374941051006317, 'timestamp': '2025-10-01 04:18:46.750949', 'step': 4906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:46.799698', 'step': 4906, 'epoch': 1} {'type': 'loss', 'content': 0.1287529319524765, 'timestamp': '2025-10-01 04:18:46.802160', 'step': 4907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:46.858860', 'step': 4907, 'epoch': 1} {'type': 'loss', 'content': 0.16105927526950836, 'timestamp': '2025-10-01 04:18:46.882396', 'step': 4908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:46.918113', 'step': 4908, 'epoch': 1} {'type': 'loss', 'content': 0.1398967206478119, 'timestamp': '2025-10-01 04:18:46.923429', 'step': 4909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:46.959089', 'step': 4909, 'epoch': 1} {'type': 'loss', 'content': 0.10215723514556885, 'timestamp': '2025-10-01 04:18:46.961357', 'step': 4910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:47.005167', 'step': 4910, 'epoch': 1} {'type': 'loss', 'content': 0.27593275904655457, 'timestamp': '2025-10-01 04:18:47.007299', 'step': 4911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:47.050483', 'step': 4911, 'epoch': 1} {'type': 'loss', 'content': 0.18892104923725128, 'timestamp': '2025-10-01 04:18:47.074264', 'step': 4912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:47.108857', 'step': 4912, 'epoch': 1} {'type': 'loss', 'content': 0.12662345170974731, 'timestamp': '2025-10-01 04:18:47.111178', 'step': 4913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:47.142559', 'step': 4913, 'epoch': 1} {'type': 'loss', 'content': 0.15594734251499176, 'timestamp': '2025-10-01 04:18:47.145024', 'step': 4914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:47.177959', 'step': 4914, 'epoch': 1} {'type': 'loss', 'content': 0.19996392726898193, 'timestamp': '2025-10-01 04:18:47.180445', 'step': 4915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:47.214640', 'step': 4915, 'epoch': 1} {'type': 'loss', 'content': 0.14033623039722443, 'timestamp': '2025-10-01 04:18:47.238681', 'step': 4916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:47.271149', 'step': 4916, 'epoch': 1} {'type': 'loss', 'content': 0.16505436599254608, 'timestamp': '2025-10-01 04:18:47.273291', 'step': 4917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:47.305386', 'step': 4917, 'epoch': 1} {'type': 'loss', 'content': 0.12073501944541931, 'timestamp': '2025-10-01 04:18:47.307746', 'step': 4918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:47.339347', 'step': 4918, 'epoch': 1} {'type': 'loss', 'content': 0.14080825448036194, 'timestamp': '2025-10-01 04:18:47.341751', 'step': 4919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:47.374327', 'step': 4919, 'epoch': 1} {'type': 'loss', 'content': 0.1224445104598999, 'timestamp': '2025-10-01 04:18:47.398313', 'step': 4920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:47.431697', 'step': 4920, 'epoch': 1} {'type': 'loss', 'content': 0.15342043340206146, 'timestamp': '2025-10-01 04:18:47.433749', 'step': 4921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:47.466342', 'step': 4921, 'epoch': 1} {'type': 'loss', 'content': 0.10150208324193954, 'timestamp': '2025-10-01 04:18:47.468313', 'step': 4922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:47.504740', 'step': 4922, 'epoch': 1} {'type': 'loss', 'content': 0.18165752291679382, 'timestamp': '2025-10-01 04:18:47.507345', 'step': 4923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:47.539387', 'step': 4923, 'epoch': 1} {'type': 'loss', 'content': 0.10903900861740112, 'timestamp': '2025-10-01 04:18:47.563259', 'step': 4924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:47.598704', 'step': 4924, 'epoch': 1} {'type': 'loss', 'content': 0.13932761549949646, 'timestamp': '2025-10-01 04:18:47.604531', 'step': 4925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:47.648843', 'step': 4925, 'epoch': 1} {'type': 'loss', 'content': 0.1661834865808487, 'timestamp': '2025-10-01 04:18:47.656674', 'step': 4926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:47.699900', 'step': 4926, 'epoch': 1} {'type': 'loss', 'content': 0.2725522220134735, 'timestamp': '2025-10-01 04:18:47.702946', 'step': 4927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:47.737318', 'step': 4927, 'epoch': 1} {'type': 'loss', 'content': 0.16922211647033691, 'timestamp': '2025-10-01 04:18:47.760978', 'step': 4928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:47.793963', 'step': 4928, 'epoch': 1} {'type': 'loss', 'content': 0.18125513195991516, 'timestamp': '2025-10-01 04:18:47.795674', 'step': 4929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:47.826007', 'step': 4929, 'epoch': 1} {'type': 'loss', 'content': 0.13188576698303223, 'timestamp': '2025-10-01 04:18:47.828374', 'step': 4930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:47.859070', 'step': 4930, 'epoch': 1} {'type': 'loss', 'content': 0.13533137738704681, 'timestamp': '2025-10-01 04:18:47.861049', 'step': 4931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:47.892612', 'step': 4931, 'epoch': 1} {'type': 'loss', 'content': 0.12230368703603745, 'timestamp': '2025-10-01 04:18:47.916116', 'step': 4932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:47.947689', 'step': 4932, 'epoch': 1} {'type': 'loss', 'content': 0.08800922334194183, 'timestamp': '2025-10-01 04:18:47.949613', 'step': 4933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:47.981856', 'step': 4933, 'epoch': 1} {'type': 'loss', 'content': 0.16217680275440216, 'timestamp': '2025-10-01 04:18:47.984626', 'step': 4934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:48.015619', 'step': 4934, 'epoch': 1} {'type': 'loss', 'content': 0.14558030664920807, 'timestamp': '2025-10-01 04:18:48.017755', 'step': 4935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:48.048688', 'step': 4935, 'epoch': 1} {'type': 'loss', 'content': 0.17679144442081451, 'timestamp': '2025-10-01 04:18:48.072272', 'step': 4936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:48.103075', 'step': 4936, 'epoch': 1} {'type': 'loss', 'content': 0.1595037579536438, 'timestamp': '2025-10-01 04:18:48.105093', 'step': 4937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:48.136773', 'step': 4937, 'epoch': 1} {'type': 'loss', 'content': 0.15816953778266907, 'timestamp': '2025-10-01 04:18:48.138714', 'step': 4938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:48.169518', 'step': 4938, 'epoch': 1} {'type': 'loss', 'content': 0.15891635417938232, 'timestamp': '2025-10-01 04:18:48.171627', 'step': 4939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:48.209247', 'step': 4939, 'epoch': 1} {'type': 'loss', 'content': 0.0987742468714714, 'timestamp': '2025-10-01 04:18:48.234515', 'step': 4940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:48.266847', 'step': 4940, 'epoch': 1} {'type': 'loss', 'content': 0.12849073112010956, 'timestamp': '2025-10-01 04:18:48.274837', 'step': 4941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:48.306008', 'step': 4941, 'epoch': 1} {'type': 'loss', 'content': 0.22477754950523376, 'timestamp': '2025-10-01 04:18:48.307974', 'step': 4942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:48.339309', 'step': 4942, 'epoch': 1} {'type': 'loss', 'content': 0.0969403013586998, 'timestamp': '2025-10-01 04:18:48.341455', 'step': 4943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:48.373874', 'step': 4943, 'epoch': 1} {'type': 'loss', 'content': 0.24662663042545319, 'timestamp': '2025-10-01 04:18:48.397455', 'step': 4944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:48.430189', 'step': 4944, 'epoch': 1} {'type': 'loss', 'content': 0.15585485100746155, 'timestamp': '2025-10-01 04:18:48.432150', 'step': 4945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:48.463435', 'step': 4945, 'epoch': 1} {'type': 'loss', 'content': 0.3211210072040558, 'timestamp': '2025-10-01 04:18:48.466206', 'step': 4946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:48.498506', 'step': 4946, 'epoch': 1} {'type': 'loss', 'content': 0.200946643948555, 'timestamp': '2025-10-01 04:18:48.500950', 'step': 4947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:48.534521', 'step': 4947, 'epoch': 1} {'type': 'loss', 'content': 0.15616543591022491, 'timestamp': '2025-10-01 04:18:48.565790', 'step': 4948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:48.598405', 'step': 4948, 'epoch': 1} {'type': 'loss', 'content': 0.19461333751678467, 'timestamp': '2025-10-01 04:18:48.600549', 'step': 4949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:48.636341', 'step': 4949, 'epoch': 1} {'type': 'loss', 'content': 0.0885990634560585, 'timestamp': '2025-10-01 04:18:48.638430', 'step': 4950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:48.670940', 'step': 4950, 'epoch': 1} {'type': 'loss', 'content': 0.11360406130552292, 'timestamp': '2025-10-01 04:18:48.672937', 'step': 4951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:48.705890', 'step': 4951, 'epoch': 1} {'type': 'loss', 'content': 0.14816971123218536, 'timestamp': '2025-10-01 04:18:48.729601', 'step': 4952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:48.759833', 'step': 4952, 'epoch': 1} {'type': 'loss', 'content': 0.18665827810764313, 'timestamp': '2025-10-01 04:18:48.761771', 'step': 4953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:48.793171', 'step': 4953, 'epoch': 1} {'type': 'loss', 'content': 0.12370242178440094, 'timestamp': '2025-10-01 04:18:48.795146', 'step': 4954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:48.829706', 'step': 4954, 'epoch': 1} {'type': 'loss', 'content': 0.14271040260791779, 'timestamp': '2025-10-01 04:18:48.831815', 'step': 4955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:48.863091', 'step': 4955, 'epoch': 1} {'type': 'loss', 'content': 0.2514873147010803, 'timestamp': '2025-10-01 04:18:48.887777', 'step': 4956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:48.921485', 'step': 4956, 'epoch': 1} {'type': 'loss', 'content': 0.1187136098742485, 'timestamp': '2025-10-01 04:18:48.923481', 'step': 4957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:48.954866', 'step': 4957, 'epoch': 1} {'type': 'loss', 'content': 0.152564138174057, 'timestamp': '2025-10-01 04:18:48.957300', 'step': 4958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:48.989449', 'step': 4958, 'epoch': 1} {'type': 'loss', 'content': 0.19001230597496033, 'timestamp': '2025-10-01 04:18:48.991452', 'step': 4959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:49.027078', 'step': 4959, 'epoch': 1} {'type': 'loss', 'content': 0.14693665504455566, 'timestamp': '2025-10-01 04:18:49.050678', 'step': 4960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:49.084882', 'step': 4960, 'epoch': 1} {'type': 'loss', 'content': 0.13323619961738586, 'timestamp': '2025-10-01 04:18:49.086786', 'step': 4961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:49.118466', 'step': 4961, 'epoch': 1} {'type': 'loss', 'content': 0.17766407132148743, 'timestamp': '2025-10-01 04:18:49.120919', 'step': 4962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:49.173928', 'step': 4962, 'epoch': 1} {'type': 'loss', 'content': 0.13646620512008667, 'timestamp': '2025-10-01 04:18:49.182292', 'step': 4963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:49.223706', 'step': 4963, 'epoch': 1} {'type': 'loss', 'content': 0.13423505425453186, 'timestamp': '2025-10-01 04:18:49.249293', 'step': 4964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:49.282273', 'step': 4964, 'epoch': 1} {'type': 'loss', 'content': 0.10533109307289124, 'timestamp': '2025-10-01 04:18:49.285543', 'step': 4965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:49.318938', 'step': 4965, 'epoch': 1} {'type': 'loss', 'content': 0.20551617443561554, 'timestamp': '2025-10-01 04:18:49.320864', 'step': 4966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:49.353125', 'step': 4966, 'epoch': 1} {'type': 'loss', 'content': 0.16442157328128815, 'timestamp': '2025-10-01 04:18:49.355159', 'step': 4967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:49.387445', 'step': 4967, 'epoch': 1} {'type': 'loss', 'content': 0.0689849853515625, 'timestamp': '2025-10-01 04:18:49.420901', 'step': 4968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:49.456164', 'step': 4968, 'epoch': 1} {'type': 'loss', 'content': 0.25916191935539246, 'timestamp': '2025-10-01 04:18:49.458213', 'step': 4969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:49.492175', 'step': 4969, 'epoch': 1} {'type': 'loss', 'content': 0.17013564705848694, 'timestamp': '2025-10-01 04:18:49.494149', 'step': 4970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:49.527692', 'step': 4970, 'epoch': 1} {'type': 'loss', 'content': 0.17281576991081238, 'timestamp': '2025-10-01 04:18:49.530114', 'step': 4971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:49.577531', 'step': 4971, 'epoch': 1} {'type': 'loss', 'content': 0.08844495564699173, 'timestamp': '2025-10-01 04:18:49.601175', 'step': 4972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:49.635898', 'step': 4972, 'epoch': 1} {'type': 'loss', 'content': 0.14029712975025177, 'timestamp': '2025-10-01 04:18:49.637910', 'step': 4973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:49.669698', 'step': 4973, 'epoch': 1} {'type': 'loss', 'content': 0.19054099917411804, 'timestamp': '2025-10-01 04:18:49.671795', 'step': 4974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:49.708623', 'step': 4974, 'epoch': 1} {'type': 'loss', 'content': 0.09974616020917892, 'timestamp': '2025-10-01 04:18:49.710643', 'step': 4975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:49.744463', 'step': 4975, 'epoch': 1} {'type': 'loss', 'content': 0.09548629820346832, 'timestamp': '2025-10-01 04:18:49.767893', 'step': 4976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:49.801218', 'step': 4976, 'epoch': 1} {'type': 'loss', 'content': 0.15775592625141144, 'timestamp': '2025-10-01 04:18:49.803530', 'step': 4977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:49.838256', 'step': 4977, 'epoch': 1} {'type': 'loss', 'content': 0.11917343735694885, 'timestamp': '2025-10-01 04:18:49.842125', 'step': 4978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:49.882263', 'step': 4978, 'epoch': 1} {'type': 'loss', 'content': 0.28073379397392273, 'timestamp': '2025-10-01 04:18:49.884583', 'step': 4979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:49.922010', 'step': 4979, 'epoch': 1} {'type': 'loss', 'content': 0.121038056910038, 'timestamp': '2025-10-01 04:18:49.950773', 'step': 4980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:49.983351', 'step': 4980, 'epoch': 1} {'type': 'loss', 'content': 0.1380334198474884, 'timestamp': '2025-10-01 04:18:49.988829', 'step': 4981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:50.022117', 'step': 4981, 'epoch': 1} {'type': 'loss', 'content': 0.10355886816978455, 'timestamp': '2025-10-01 04:18:50.024979', 'step': 4982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:50.061274', 'step': 4982, 'epoch': 1} {'type': 'loss', 'content': 0.1010100319981575, 'timestamp': '2025-10-01 04:18:50.063472', 'step': 4983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:50.102361', 'step': 4983, 'epoch': 1} {'type': 'loss', 'content': 0.1945534497499466, 'timestamp': '2025-10-01 04:18:50.125982', 'step': 4984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:50.157172', 'step': 4984, 'epoch': 1} {'type': 'loss', 'content': 0.22926372289657593, 'timestamp': '2025-10-01 04:18:50.159146', 'step': 4985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:50.190214', 'step': 4985, 'epoch': 1} {'type': 'loss', 'content': 0.17377027869224548, 'timestamp': '2025-10-01 04:18:50.192887', 'step': 4986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:50.226859', 'step': 4986, 'epoch': 1} {'type': 'loss', 'content': 0.15436381101608276, 'timestamp': '2025-10-01 04:18:50.228901', 'step': 4987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:50.261735', 'step': 4987, 'epoch': 1} {'type': 'loss', 'content': 0.10631132870912552, 'timestamp': '2025-10-01 04:18:50.285169', 'step': 4988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:50.315078', 'step': 4988, 'epoch': 1} {'type': 'loss', 'content': 0.0871405154466629, 'timestamp': '2025-10-01 04:18:50.317157', 'step': 4989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:50.352563', 'step': 4989, 'epoch': 1} {'type': 'loss', 'content': 0.10889474302530289, 'timestamp': '2025-10-01 04:18:50.354889', 'step': 4990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:50.387655', 'step': 4990, 'epoch': 1} {'type': 'loss', 'content': 0.22991691529750824, 'timestamp': '2025-10-01 04:18:50.389950', 'step': 4991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:50.422615', 'step': 4991, 'epoch': 1} {'type': 'loss', 'content': 0.2335638552904129, 'timestamp': '2025-10-01 04:18:50.451968', 'step': 4992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:50.484904', 'step': 4992, 'epoch': 1} {'type': 'loss', 'content': 0.12577588856220245, 'timestamp': '2025-10-01 04:18:50.496934', 'step': 4993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:50.535857', 'step': 4993, 'epoch': 1} {'type': 'loss', 'content': 0.25101980566978455, 'timestamp': '2025-10-01 04:18:50.539162', 'step': 4994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:50.575920', 'step': 4994, 'epoch': 1} {'type': 'loss', 'content': 0.15836700797080994, 'timestamp': '2025-10-01 04:18:50.577993', 'step': 4995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:50.622567', 'step': 4995, 'epoch': 1} {'type': 'loss', 'content': 0.14789918065071106, 'timestamp': '2025-10-01 04:18:50.646474', 'step': 4996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:50.679203', 'step': 4996, 'epoch': 1} {'type': 'loss', 'content': 0.14393195509910583, 'timestamp': '2025-10-01 04:18:50.681128', 'step': 4997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:50.728209', 'step': 4997, 'epoch': 1} {'type': 'loss', 'content': 0.1393187791109085, 'timestamp': '2025-10-01 04:18:50.730384', 'step': 4998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:50.765814', 'step': 4998, 'epoch': 1} {'type': 'loss', 'content': 0.2350332885980606, 'timestamp': '2025-10-01 04:18:50.767764', 'step': 4999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:18:50.806813', 'step': 4999, 'epoch': 1} {'type': 'loss', 'content': 0.12332519143819809, 'timestamp': '2025-10-01 04:18:50.832430', 'step': 5000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-10-01 04:18:56.237057', 'step': 5000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:56.269381', 'step': 5000, 'epoch': 1} {'type': 'loss', 'content': 0.12517906725406647, 'timestamp': '2025-10-01 04:18:56.272178', 'step': 5001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:56.304540', 'step': 5001, 'epoch': 1} {'type': 'loss', 'content': 0.13159726560115814, 'timestamp': '2025-10-01 04:18:56.307435', 'step': 5002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:56.338596', 'step': 5002, 'epoch': 1} {'type': 'loss', 'content': 0.160233274102211, 'timestamp': '2025-10-01 04:18:56.342072', 'step': 5003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:56.375940', 'step': 5003, 'epoch': 1} {'type': 'loss', 'content': 0.1400163471698761, 'timestamp': '2025-10-01 04:18:56.399727', 'step': 5004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:56.432080', 'step': 5004, 'epoch': 1} {'type': 'loss', 'content': 0.16769851744174957, 'timestamp': '2025-10-01 04:18:56.433979', 'step': 5005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:56.464557', 'step': 5005, 'epoch': 1} {'type': 'loss', 'content': 0.1968800127506256, 'timestamp': '2025-10-01 04:18:56.466914', 'step': 5006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:56.498532', 'step': 5006, 'epoch': 1} {'type': 'loss', 'content': 0.1523379534482956, 'timestamp': '2025-10-01 04:18:56.500734', 'step': 5007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:56.531453', 'step': 5007, 'epoch': 1} {'type': 'loss', 'content': 0.16184261441230774, 'timestamp': '2025-10-01 04:18:56.555506', 'step': 5008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:56.588082', 'step': 5008, 'epoch': 1} {'type': 'loss', 'content': 0.17008496820926666, 'timestamp': '2025-10-01 04:18:56.590859', 'step': 5009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:56.621876', 'step': 5009, 'epoch': 1} {'type': 'loss', 'content': 0.1419879049062729, 'timestamp': '2025-10-01 04:18:56.623932', 'step': 5010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:56.655345', 'step': 5010, 'epoch': 1} {'type': 'loss', 'content': 0.19778972864151, 'timestamp': '2025-10-01 04:18:56.657644', 'step': 5011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:56.689603', 'step': 5011, 'epoch': 1} {'type': 'loss', 'content': 0.12813526391983032, 'timestamp': '2025-10-01 04:18:56.713170', 'step': 5012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:56.744345', 'step': 5012, 'epoch': 1} {'type': 'loss', 'content': 0.16987253725528717, 'timestamp': '2025-10-01 04:18:56.747784', 'step': 5013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:56.779003', 'step': 5013, 'epoch': 1} {'type': 'loss', 'content': 0.14092232286930084, 'timestamp': '2025-10-01 04:18:56.781144', 'step': 5014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:56.812544', 'step': 5014, 'epoch': 1} {'type': 'loss', 'content': 0.1053183451294899, 'timestamp': '2025-10-01 04:18:56.814602', 'step': 5015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:56.845898', 'step': 5015, 'epoch': 1} {'type': 'loss', 'content': 0.14335596561431885, 'timestamp': '2025-10-01 04:18:56.869641', 'step': 5016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:56.900176', 'step': 5016, 'epoch': 1} {'type': 'loss', 'content': 0.22989138960838318, 'timestamp': '2025-10-01 04:18:56.902252', 'step': 5017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:56.933595', 'step': 5017, 'epoch': 1} {'type': 'loss', 'content': 0.16811305284500122, 'timestamp': '2025-10-01 04:18:56.936504', 'step': 5018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:56.971975', 'step': 5018, 'epoch': 1} {'type': 'loss', 'content': 0.13186508417129517, 'timestamp': '2025-10-01 04:18:56.974074', 'step': 5019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:57.007167', 'step': 5019, 'epoch': 1} {'type': 'loss', 'content': 0.20730921626091003, 'timestamp': '2025-10-01 04:18:57.030667', 'step': 5020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:57.088063', 'step': 5020, 'epoch': 1} {'type': 'loss', 'content': 0.10076282918453217, 'timestamp': '2025-10-01 04:18:57.090087', 'step': 5021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:57.122665', 'step': 5021, 'epoch': 1} {'type': 'loss', 'content': 0.14097441732883453, 'timestamp': '2025-10-01 04:18:57.132462', 'step': 5022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:57.162994', 'step': 5022, 'epoch': 1} {'type': 'loss', 'content': 0.10342434048652649, 'timestamp': '2025-10-01 04:18:57.165340', 'step': 5023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:57.199416', 'step': 5023, 'epoch': 1} {'type': 'loss', 'content': 0.14320369064807892, 'timestamp': '2025-10-01 04:18:57.223009', 'step': 5024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:57.252864', 'step': 5024, 'epoch': 1} {'type': 'loss', 'content': 0.12259942293167114, 'timestamp': '2025-10-01 04:18:57.254642', 'step': 5025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:57.285680', 'step': 5025, 'epoch': 1} {'type': 'loss', 'content': 0.18064774572849274, 'timestamp': '2025-10-01 04:18:57.288541', 'step': 5026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:57.320247', 'step': 5026, 'epoch': 1} {'type': 'loss', 'content': 0.22235509753227234, 'timestamp': '2025-10-01 04:18:57.322348', 'step': 5027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:57.352072', 'step': 5027, 'epoch': 1} {'type': 'loss', 'content': 0.1804298758506775, 'timestamp': '2025-10-01 04:18:57.375490', 'step': 5028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:57.405847', 'step': 5028, 'epoch': 1} {'type': 'loss', 'content': 0.17438901960849762, 'timestamp': '2025-10-01 04:18:57.408057', 'step': 5029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:57.439592', 'step': 5029, 'epoch': 1} {'type': 'loss', 'content': 0.15454836189746857, 'timestamp': '2025-10-01 04:18:57.441770', 'step': 5030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:57.474059', 'step': 5030, 'epoch': 1} {'type': 'loss', 'content': 0.12929801642894745, 'timestamp': '2025-10-01 04:18:57.477545', 'step': 5031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:57.512539', 'step': 5031, 'epoch': 1} {'type': 'loss', 'content': 0.14325077831745148, 'timestamp': '2025-10-01 04:18:57.536060', 'step': 5032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:57.569580', 'step': 5032, 'epoch': 1} {'type': 'loss', 'content': 0.18160900473594666, 'timestamp': '2025-10-01 04:18:57.572863', 'step': 5033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:57.614376', 'step': 5033, 'epoch': 1} {'type': 'loss', 'content': 0.1473577916622162, 'timestamp': '2025-10-01 04:18:57.616390', 'step': 5034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:57.650674', 'step': 5034, 'epoch': 1} {'type': 'loss', 'content': 0.09459388256072998, 'timestamp': '2025-10-01 04:18:57.652783', 'step': 5035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:57.684198', 'step': 5035, 'epoch': 1} {'type': 'loss', 'content': 0.2975517809391022, 'timestamp': '2025-10-01 04:18:57.707857', 'step': 5036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:57.738621', 'step': 5036, 'epoch': 1} {'type': 'loss', 'content': 0.1262391358613968, 'timestamp': '2025-10-01 04:18:57.740853', 'step': 5037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:57.771414', 'step': 5037, 'epoch': 1} {'type': 'loss', 'content': 0.20748624205589294, 'timestamp': '2025-10-01 04:18:57.773324', 'step': 5038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:57.803413', 'step': 5038, 'epoch': 1} {'type': 'loss', 'content': 0.24965739250183105, 'timestamp': '2025-10-01 04:18:57.805422', 'step': 5039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:57.835850', 'step': 5039, 'epoch': 1} {'type': 'loss', 'content': 0.18254485726356506, 'timestamp': '2025-10-01 04:18:57.859346', 'step': 5040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:57.890555', 'step': 5040, 'epoch': 1} {'type': 'loss', 'content': 0.13726666569709778, 'timestamp': '2025-10-01 04:18:57.892849', 'step': 5041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:57.926201', 'step': 5041, 'epoch': 1} {'type': 'loss', 'content': 0.14656877517700195, 'timestamp': '2025-10-01 04:18:57.928132', 'step': 5042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:57.960550', 'step': 5042, 'epoch': 1} {'type': 'loss', 'content': 0.20423077046871185, 'timestamp': '2025-10-01 04:18:57.962573', 'step': 5043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:57.994860', 'step': 5043, 'epoch': 1} {'type': 'loss', 'content': 0.19494277238845825, 'timestamp': '2025-10-01 04:18:58.018565', 'step': 5044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.048855', 'step': 5044, 'epoch': 1} {'type': 'loss', 'content': 0.20718342065811157, 'timestamp': '2025-10-01 04:18:58.050924', 'step': 5045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:58.085092', 'step': 5045, 'epoch': 1} {'type': 'loss', 'content': 0.12744231522083282, 'timestamp': '2025-10-01 04:18:58.087836', 'step': 5046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.118429', 'step': 5046, 'epoch': 1} {'type': 'loss', 'content': 0.21224345266819, 'timestamp': '2025-10-01 04:18:58.120407', 'step': 5047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.150448', 'step': 5047, 'epoch': 1} {'type': 'loss', 'content': 0.13224266469478607, 'timestamp': '2025-10-01 04:18:58.173838', 'step': 5048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:18:58.205973', 'step': 5048, 'epoch': 1} {'type': 'loss', 'content': 0.13638493418693542, 'timestamp': '2025-10-01 04:18:58.208590', 'step': 5049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.246849', 'step': 5049, 'epoch': 1} {'type': 'loss', 'content': 0.15023289620876312, 'timestamp': '2025-10-01 04:18:58.248897', 'step': 5050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:58.280088', 'step': 5050, 'epoch': 1} {'type': 'loss', 'content': 0.1911451518535614, 'timestamp': '2025-10-01 04:18:58.282465', 'step': 5051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:58.311857', 'step': 5051, 'epoch': 1} {'type': 'loss', 'content': 0.17872688174247742, 'timestamp': '2025-10-01 04:18:58.335505', 'step': 5052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:58.365111', 'step': 5052, 'epoch': 1} {'type': 'loss', 'content': 0.12896908819675446, 'timestamp': '2025-10-01 04:18:58.367007', 'step': 5053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:58.398860', 'step': 5053, 'epoch': 1} {'type': 'loss', 'content': 0.12643593549728394, 'timestamp': '2025-10-01 04:18:58.401229', 'step': 5054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:58.432423', 'step': 5054, 'epoch': 1} {'type': 'loss', 'content': 0.18903468549251556, 'timestamp': '2025-10-01 04:18:58.434581', 'step': 5055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.464460', 'step': 5055, 'epoch': 1} {'type': 'loss', 'content': 0.08471282571554184, 'timestamp': '2025-10-01 04:18:58.493897', 'step': 5056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:18:58.524143', 'step': 5056, 'epoch': 1} {'type': 'loss', 'content': 0.3193246126174927, 'timestamp': '2025-10-01 04:18:58.527520', 'step': 5057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.558710', 'step': 5057, 'epoch': 1} {'type': 'loss', 'content': 0.11321870237588882, 'timestamp': '2025-10-01 04:18:58.560694', 'step': 5058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:58.592663', 'step': 5058, 'epoch': 1} {'type': 'loss', 'content': 0.13942952454090118, 'timestamp': '2025-10-01 04:18:58.595530', 'step': 5059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:58.625561', 'step': 5059, 'epoch': 1} {'type': 'loss', 'content': 0.06833699345588684, 'timestamp': '2025-10-01 04:18:58.648975', 'step': 5060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:58.679070', 'step': 5060, 'epoch': 1} {'type': 'loss', 'content': 0.0890003889799118, 'timestamp': '2025-10-01 04:18:58.682433', 'step': 5061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.716659', 'step': 5061, 'epoch': 1} {'type': 'loss', 'content': 0.16102375090122223, 'timestamp': '2025-10-01 04:18:58.718686', 'step': 5062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.750972', 'step': 5062, 'epoch': 1} {'type': 'loss', 'content': 0.1337662786245346, 'timestamp': '2025-10-01 04:18:58.753044', 'step': 5063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.785456', 'step': 5063, 'epoch': 1} {'type': 'loss', 'content': 0.2436629831790924, 'timestamp': '2025-10-01 04:18:58.808962', 'step': 5064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.844914', 'step': 5064, 'epoch': 1} {'type': 'loss', 'content': 0.1487547904253006, 'timestamp': '2025-10-01 04:18:58.848194', 'step': 5065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:58.880604', 'step': 5065, 'epoch': 1} {'type': 'loss', 'content': 0.2319907695055008, 'timestamp': '2025-10-01 04:18:58.882766', 'step': 5066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:58.916259', 'step': 5066, 'epoch': 1} {'type': 'loss', 'content': 0.22043387591838837, 'timestamp': '2025-10-01 04:18:58.918421', 'step': 5067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:58.950479', 'step': 5067, 'epoch': 1} {'type': 'loss', 'content': 0.12438935041427612, 'timestamp': '2025-10-01 04:18:58.974428', 'step': 5068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:18:59.007305', 'step': 5068, 'epoch': 1} {'type': 'loss', 'content': 0.17313773930072784, 'timestamp': '2025-10-01 04:18:59.009349', 'step': 5069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:59.041802', 'step': 5069, 'epoch': 1} {'type': 'loss', 'content': 0.2022615224123001, 'timestamp': '2025-10-01 04:18:59.043783', 'step': 5070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:59.073571', 'step': 5070, 'epoch': 1} {'type': 'loss', 'content': 0.13246135413646698, 'timestamp': '2025-10-01 04:18:59.082150', 'step': 5071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:59.115360', 'step': 5071, 'epoch': 1} {'type': 'loss', 'content': 0.054265640676021576, 'timestamp': '2025-10-01 04:18:59.138771', 'step': 5072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:59.169259', 'step': 5072, 'epoch': 1} {'type': 'loss', 'content': 0.12357644736766815, 'timestamp': '2025-10-01 04:18:59.171671', 'step': 5073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:59.201379', 'step': 5073, 'epoch': 1} {'type': 'loss', 'content': 0.17157389223575592, 'timestamp': '2025-10-01 04:18:59.203334', 'step': 5074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:59.233000', 'step': 5074, 'epoch': 1} {'type': 'loss', 'content': 0.12966060638427734, 'timestamp': '2025-10-01 04:18:59.235257', 'step': 5075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:59.267410', 'step': 5075, 'epoch': 1} {'type': 'loss', 'content': 0.16022884845733643, 'timestamp': '2025-10-01 04:18:59.294481', 'step': 5076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:59.324803', 'step': 5076, 'epoch': 1} {'type': 'loss', 'content': 0.11575109511613846, 'timestamp': '2025-10-01 04:18:59.326740', 'step': 5077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:59.360543', 'step': 5077, 'epoch': 1} {'type': 'loss', 'content': 0.12945376336574554, 'timestamp': '2025-10-01 04:18:59.362918', 'step': 5078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:59.398430', 'step': 5078, 'epoch': 1} {'type': 'loss', 'content': 0.19317477941513062, 'timestamp': '2025-10-01 04:18:59.405215', 'step': 5079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:59.437196', 'step': 5079, 'epoch': 1} {'type': 'loss', 'content': 0.09977959096431732, 'timestamp': '2025-10-01 04:18:59.460819', 'step': 5080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:59.491097', 'step': 5080, 'epoch': 1} {'type': 'loss', 'content': 0.17255425453186035, 'timestamp': '2025-10-01 04:18:59.493352', 'step': 5081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:59.524218', 'step': 5081, 'epoch': 1} {'type': 'loss', 'content': 0.20632028579711914, 'timestamp': '2025-10-01 04:18:59.527851', 'step': 5082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:18:59.562329', 'step': 5082, 'epoch': 1} {'type': 'loss', 'content': 0.12554629147052765, 'timestamp': '2025-10-01 04:18:59.564769', 'step': 5083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:59.599734', 'step': 5083, 'epoch': 1} {'type': 'loss', 'content': 0.11968085914850235, 'timestamp': '2025-10-01 04:18:59.623257', 'step': 5084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:59.653917', 'step': 5084, 'epoch': 1} {'type': 'loss', 'content': 0.16935089230537415, 'timestamp': '2025-10-01 04:18:59.656053', 'step': 5085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:59.686367', 'step': 5085, 'epoch': 1} {'type': 'loss', 'content': 0.10153363645076752, 'timestamp': '2025-10-01 04:18:59.688416', 'step': 5086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:59.719387', 'step': 5086, 'epoch': 1} {'type': 'loss', 'content': 0.23834997415542603, 'timestamp': '2025-10-01 04:18:59.721329', 'step': 5087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:59.752686', 'step': 5087, 'epoch': 1} {'type': 'loss', 'content': 0.12192347645759583, 'timestamp': '2025-10-01 04:18:59.776123', 'step': 5088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:59.806449', 'step': 5088, 'epoch': 1} {'type': 'loss', 'content': 0.10109756886959076, 'timestamp': '2025-10-01 04:18:59.808604', 'step': 5089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:59.854311', 'step': 5089, 'epoch': 1} {'type': 'loss', 'content': 0.21692734956741333, 'timestamp': '2025-10-01 04:18:59.856356', 'step': 5090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:18:59.886695', 'step': 5090, 'epoch': 1} {'type': 'loss', 'content': 0.24476580321788788, 'timestamp': '2025-10-01 04:18:59.888669', 'step': 5091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:18:59.919185', 'step': 5091, 'epoch': 1} {'type': 'loss', 'content': 0.24967941641807556, 'timestamp': '2025-10-01 04:18:59.946495', 'step': 5092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:18:59.988332', 'step': 5092, 'epoch': 1} {'type': 'loss', 'content': 0.0924336239695549, 'timestamp': '2025-10-01 04:18:59.992761', 'step': 5093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:00.028086', 'step': 5093, 'epoch': 1} {'type': 'loss', 'content': 0.14642590284347534, 'timestamp': '2025-10-01 04:19:00.030331', 'step': 5094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:00.063792', 'step': 5094, 'epoch': 1} {'type': 'loss', 'content': 0.21426790952682495, 'timestamp': '2025-10-01 04:19:00.066029', 'step': 5095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:00.098032', 'step': 5095, 'epoch': 1} {'type': 'loss', 'content': 0.21123634278774261, 'timestamp': '2025-10-01 04:19:00.122060', 'step': 5096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:00.157800', 'step': 5096, 'epoch': 1} {'type': 'loss', 'content': 0.11337196081876755, 'timestamp': '2025-10-01 04:19:00.160146', 'step': 5097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:00.194012', 'step': 5097, 'epoch': 1} {'type': 'loss', 'content': 0.13179616630077362, 'timestamp': '2025-10-01 04:19:00.196001', 'step': 5098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:00.227750', 'step': 5098, 'epoch': 1} {'type': 'loss', 'content': 0.171146959066391, 'timestamp': '2025-10-01 04:19:00.233883', 'step': 5099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:00.265655', 'step': 5099, 'epoch': 1} {'type': 'loss', 'content': 0.19080501794815063, 'timestamp': '2025-10-01 04:19:00.289248', 'step': 5100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:00.321621', 'step': 5100, 'epoch': 1} {'type': 'loss', 'content': 0.15943142771720886, 'timestamp': '2025-10-01 04:19:00.324885', 'step': 5101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:00.357032', 'step': 5101, 'epoch': 1} {'type': 'loss', 'content': 0.20475077629089355, 'timestamp': '2025-10-01 04:19:00.359021', 'step': 5102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:00.407264', 'step': 5102, 'epoch': 1} {'type': 'loss', 'content': 0.23157575726509094, 'timestamp': '2025-10-01 04:19:00.410097', 'step': 5103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:00.443121', 'step': 5103, 'epoch': 1} {'type': 'loss', 'content': 0.22547245025634766, 'timestamp': '2025-10-01 04:19:00.466928', 'step': 5104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:00.498862', 'step': 5104, 'epoch': 1} {'type': 'loss', 'content': 0.17482143640518188, 'timestamp': '2025-10-01 04:19:00.500933', 'step': 5105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:00.532999', 'step': 5105, 'epoch': 1} {'type': 'loss', 'content': 0.12711574137210846, 'timestamp': '2025-10-01 04:19:00.535762', 'step': 5106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:00.567563', 'step': 5106, 'epoch': 1} {'type': 'loss', 'content': 0.1528298556804657, 'timestamp': '2025-10-01 04:19:00.569643', 'step': 5107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:00.601807', 'step': 5107, 'epoch': 1} {'type': 'loss', 'content': 0.18430352210998535, 'timestamp': '2025-10-01 04:19:00.625444', 'step': 5108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:00.658136', 'step': 5108, 'epoch': 1} {'type': 'loss', 'content': 0.2382512390613556, 'timestamp': '2025-10-01 04:19:00.660319', 'step': 5109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:00.696692', 'step': 5109, 'epoch': 1} {'type': 'loss', 'content': 0.26200127601623535, 'timestamp': '2025-10-01 04:19:00.698875', 'step': 5110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:00.730317', 'step': 5110, 'epoch': 1} {'type': 'loss', 'content': 0.16958659887313843, 'timestamp': '2025-10-01 04:19:00.732221', 'step': 5111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:00.767568', 'step': 5111, 'epoch': 1} {'type': 'loss', 'content': 0.12221387028694153, 'timestamp': '2025-10-01 04:19:00.791133', 'step': 5112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:00.823452', 'step': 5112, 'epoch': 1} {'type': 'loss', 'content': 0.1741098314523697, 'timestamp': '2025-10-01 04:19:00.825446', 'step': 5113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:00.855930', 'step': 5113, 'epoch': 1} {'type': 'loss', 'content': 0.25355255603790283, 'timestamp': '2025-10-01 04:19:00.858222', 'step': 5114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:00.888685', 'step': 5114, 'epoch': 1} {'type': 'loss', 'content': 0.14703549444675446, 'timestamp': '2025-10-01 04:19:00.890912', 'step': 5115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:00.921862', 'step': 5115, 'epoch': 1} {'type': 'loss', 'content': 0.18186475336551666, 'timestamp': '2025-10-01 04:19:00.945323', 'step': 5116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:00.977224', 'step': 5116, 'epoch': 1} {'type': 'loss', 'content': 0.12518133223056793, 'timestamp': '2025-10-01 04:19:00.979236', 'step': 5117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:01.009622', 'step': 5117, 'epoch': 1} {'type': 'loss', 'content': 0.22957868874073029, 'timestamp': '2025-10-01 04:19:01.011667', 'step': 5118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:01.042447', 'step': 5118, 'epoch': 1} {'type': 'loss', 'content': 0.21464452147483826, 'timestamp': '2025-10-01 04:19:01.044469', 'step': 5119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:01.078100', 'step': 5119, 'epoch': 1} {'type': 'loss', 'content': 0.21143078804016113, 'timestamp': '2025-10-01 04:19:01.101586', 'step': 5120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:01.133633', 'step': 5120, 'epoch': 1} {'type': 'loss', 'content': 0.15502768754959106, 'timestamp': '2025-10-01 04:19:01.135856', 'step': 5121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:01.166147', 'step': 5121, 'epoch': 1} {'type': 'loss', 'content': 0.2245447039604187, 'timestamp': '2025-10-01 04:19:01.168240', 'step': 5122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:01.200918', 'step': 5122, 'epoch': 1} {'type': 'loss', 'content': 0.11788347363471985, 'timestamp': '2025-10-01 04:19:01.203256', 'step': 5123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:01.235947', 'step': 5123, 'epoch': 1} {'type': 'loss', 'content': 0.11248242110013962, 'timestamp': '2025-10-01 04:19:01.259640', 'step': 5124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:01.292320', 'step': 5124, 'epoch': 1} {'type': 'loss', 'content': 0.13403962552547455, 'timestamp': '2025-10-01 04:19:01.294424', 'step': 5125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:01.326619', 'step': 5125, 'epoch': 1} {'type': 'loss', 'content': 0.16191573441028595, 'timestamp': '2025-10-01 04:19:01.329405', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:19:10.004780', 'step': 5126, 'epoch': 1} {'type': 'pplx', 'content': 9485.739159614697, 'timestamp': '2025-10-01 04:19:10.007387', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:10.037679', 'step': 5126, 'epoch': 1} {'type': 'loss', 'content': 0.10415645688772202, 'timestamp': '2025-10-01 04:19:10.039785', 'step': 5127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:10.070073', 'step': 5127, 'epoch': 1} {'type': 'loss', 'content': 0.2521878182888031, 'timestamp': '2025-10-01 04:19:10.093631', 'step': 5128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:10.124903', 'step': 5128, 'epoch': 1} {'type': 'loss', 'content': 0.11168064177036285, 'timestamp': '2025-10-01 04:19:10.127248', 'step': 5129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:10.158458', 'step': 5129, 'epoch': 1} {'type': 'loss', 'content': 0.13494648039340973, 'timestamp': '2025-10-01 04:19:10.162375', 'step': 5130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:10.201590', 'step': 5130, 'epoch': 1} {'type': 'loss', 'content': 0.12152969092130661, 'timestamp': '2025-10-01 04:19:10.203872', 'step': 5131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:10.240964', 'step': 5131, 'epoch': 1} {'type': 'loss', 'content': 0.1134883463382721, 'timestamp': '2025-10-01 04:19:10.264633', 'step': 5132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:10.298917', 'step': 5132, 'epoch': 1} {'type': 'loss', 'content': 0.17715035378932953, 'timestamp': '2025-10-01 04:19:10.302033', 'step': 5133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:10.333034', 'step': 5133, 'epoch': 1} {'type': 'loss', 'content': 0.13629938662052155, 'timestamp': '2025-10-01 04:19:10.335092', 'step': 5134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:10.366269', 'step': 5134, 'epoch': 1} {'type': 'loss', 'content': 0.16754119098186493, 'timestamp': '2025-10-01 04:19:10.368396', 'step': 5135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:10.399793', 'step': 5135, 'epoch': 1} {'type': 'loss', 'content': 0.14466668665409088, 'timestamp': '2025-10-01 04:19:10.423816', 'step': 5136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:10.455511', 'step': 5136, 'epoch': 1} {'type': 'loss', 'content': 0.11785203963518143, 'timestamp': '2025-10-01 04:19:10.457361', 'step': 5137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:10.500986', 'step': 5137, 'epoch': 1} {'type': 'loss', 'content': 0.237807035446167, 'timestamp': '2025-10-01 04:19:10.503017', 'step': 5138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:10.534529', 'step': 5138, 'epoch': 1} {'type': 'loss', 'content': 0.21298329532146454, 'timestamp': '2025-10-01 04:19:10.536524', 'step': 5139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:10.567052', 'step': 5139, 'epoch': 1} {'type': 'loss', 'content': 0.13572441041469574, 'timestamp': '2025-10-01 04:19:10.590936', 'step': 5140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:10.624113', 'step': 5140, 'epoch': 1} {'type': 'loss', 'content': 0.14898893237113953, 'timestamp': '2025-10-01 04:19:10.626139', 'step': 5141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:10.660861', 'step': 5141, 'epoch': 1} {'type': 'loss', 'content': 0.14136049151420593, 'timestamp': '2025-10-01 04:19:10.662791', 'step': 5142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:10.693996', 'step': 5142, 'epoch': 1} {'type': 'loss', 'content': 0.11596208810806274, 'timestamp': '2025-10-01 04:19:10.695992', 'step': 5143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:10.727702', 'step': 5143, 'epoch': 1} {'type': 'loss', 'content': 0.14207309484481812, 'timestamp': '2025-10-01 04:19:10.751359', 'step': 5144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:10.794297', 'step': 5144, 'epoch': 1} {'type': 'loss', 'content': 0.12093738466501236, 'timestamp': '2025-10-01 04:19:10.804164', 'step': 5145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:10.837494', 'step': 5145, 'epoch': 1} {'type': 'loss', 'content': 0.1201263964176178, 'timestamp': '2025-10-01 04:19:10.840241', 'step': 5146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:10.872581', 'step': 5146, 'epoch': 1} {'type': 'loss', 'content': 0.11680464446544647, 'timestamp': '2025-10-01 04:19:10.876142', 'step': 5147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:10.908289', 'step': 5147, 'epoch': 1} {'type': 'loss', 'content': 0.1918436586856842, 'timestamp': '2025-10-01 04:19:10.933519', 'step': 5148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:10.963608', 'step': 5148, 'epoch': 1} {'type': 'loss', 'content': 0.04194051772356033, 'timestamp': '2025-10-01 04:19:10.965678', 'step': 5149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:11.002459', 'step': 5149, 'epoch': 1} {'type': 'loss', 'content': 0.21940211951732635, 'timestamp': '2025-10-01 04:19:11.004455', 'step': 5150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:11.040660', 'step': 5150, 'epoch': 1} {'type': 'loss', 'content': 0.07693655043840408, 'timestamp': '2025-10-01 04:19:11.042838', 'step': 5151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:11.075621', 'step': 5151, 'epoch': 1} {'type': 'loss', 'content': 0.14327673614025116, 'timestamp': '2025-10-01 04:19:11.099108', 'step': 5152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:11.130078', 'step': 5152, 'epoch': 1} {'type': 'loss', 'content': 0.1662241667509079, 'timestamp': '2025-10-01 04:19:11.132306', 'step': 5153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:11.164851', 'step': 5153, 'epoch': 1} {'type': 'loss', 'content': 0.1490423083305359, 'timestamp': '2025-10-01 04:19:11.166862', 'step': 5154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:11.213406', 'step': 5154, 'epoch': 1} {'type': 'loss', 'content': 0.142302006483078, 'timestamp': '2025-10-01 04:19:11.215666', 'step': 5155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:11.247082', 'step': 5155, 'epoch': 1} {'type': 'loss', 'content': 0.11522051692008972, 'timestamp': '2025-10-01 04:19:11.270573', 'step': 5156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:11.306497', 'step': 5156, 'epoch': 1} {'type': 'loss', 'content': 0.12411678582429886, 'timestamp': '2025-10-01 04:19:11.309186', 'step': 5157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:11.339911', 'step': 5157, 'epoch': 1} {'type': 'loss', 'content': 0.14841878414154053, 'timestamp': '2025-10-01 04:19:11.342246', 'step': 5158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:11.375557', 'step': 5158, 'epoch': 1} {'type': 'loss', 'content': 0.1428689807653427, 'timestamp': '2025-10-01 04:19:11.378007', 'step': 5159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:11.408328', 'step': 5159, 'epoch': 1} {'type': 'loss', 'content': 0.15181094408035278, 'timestamp': '2025-10-01 04:19:11.432100', 'step': 5160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:11.463322', 'step': 5160, 'epoch': 1} {'type': 'loss', 'content': 0.11900001764297485, 'timestamp': '2025-10-01 04:19:11.465439', 'step': 5161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:11.498524', 'step': 5161, 'epoch': 1} {'type': 'loss', 'content': 0.1842820644378662, 'timestamp': '2025-10-01 04:19:11.501072', 'step': 5162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:11.532494', 'step': 5162, 'epoch': 1} {'type': 'loss', 'content': 0.23235394060611725, 'timestamp': '2025-10-01 04:19:11.535002', 'step': 5163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:11.566309', 'step': 5163, 'epoch': 1} {'type': 'loss', 'content': 0.17414213716983795, 'timestamp': '2025-10-01 04:19:11.589732', 'step': 5164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:11.620148', 'step': 5164, 'epoch': 1} {'type': 'loss', 'content': 0.17139600217342377, 'timestamp': '2025-10-01 04:19:11.622208', 'step': 5165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:11.653593', 'step': 5165, 'epoch': 1} {'type': 'loss', 'content': 0.10973648726940155, 'timestamp': '2025-10-01 04:19:11.655639', 'step': 5166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:11.694377', 'step': 5166, 'epoch': 1} {'type': 'loss', 'content': 0.1967139095067978, 'timestamp': '2025-10-01 04:19:11.696411', 'step': 5167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:11.735682', 'step': 5167, 'epoch': 1} {'type': 'loss', 'content': 0.10249185562133789, 'timestamp': '2025-10-01 04:19:11.759105', 'step': 5168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:11.792038', 'step': 5168, 'epoch': 1} {'type': 'loss', 'content': 0.22847989201545715, 'timestamp': '2025-10-01 04:19:11.794359', 'step': 5169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:11.826611', 'step': 5169, 'epoch': 1} {'type': 'loss', 'content': 0.18445590138435364, 'timestamp': '2025-10-01 04:19:11.828652', 'step': 5170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:11.867634', 'step': 5170, 'epoch': 1} {'type': 'loss', 'content': 0.20090351998806, 'timestamp': '2025-10-01 04:19:11.873894', 'step': 5171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:11.904372', 'step': 5171, 'epoch': 1} {'type': 'loss', 'content': 0.1729903221130371, 'timestamp': '2025-10-01 04:19:11.927830', 'step': 5172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:11.959215', 'step': 5172, 'epoch': 1} {'type': 'loss', 'content': 0.11791300028562546, 'timestamp': '2025-10-01 04:19:11.962210', 'step': 5173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:11.992798', 'step': 5173, 'epoch': 1} {'type': 'loss', 'content': 0.15130683779716492, 'timestamp': '2025-10-01 04:19:11.994879', 'step': 5174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:12.025870', 'step': 5174, 'epoch': 1} {'type': 'loss', 'content': 0.20219042897224426, 'timestamp': '2025-10-01 04:19:12.028767', 'step': 5175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:12.061328', 'step': 5175, 'epoch': 1} {'type': 'loss', 'content': 0.1066969707608223, 'timestamp': '2025-10-01 04:19:12.084917', 'step': 5176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:19:12.116845', 'step': 5176, 'epoch': 1} {'type': 'loss', 'content': 0.1412758231163025, 'timestamp': '2025-10-01 04:19:12.119125', 'step': 5177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:12.149917', 'step': 5177, 'epoch': 1} {'type': 'loss', 'content': 0.13324813544750214, 'timestamp': '2025-10-01 04:19:12.153486', 'step': 5178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:12.187476', 'step': 5178, 'epoch': 1} {'type': 'loss', 'content': 0.15639084577560425, 'timestamp': '2025-10-01 04:19:12.189491', 'step': 5179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:12.222689', 'step': 5179, 'epoch': 1} {'type': 'loss', 'content': 0.21893680095672607, 'timestamp': '2025-10-01 04:19:12.246227', 'step': 5180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:12.280159', 'step': 5180, 'epoch': 1} {'type': 'loss', 'content': 0.16987062990665436, 'timestamp': '2025-10-01 04:19:12.282343', 'step': 5181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:12.312955', 'step': 5181, 'epoch': 1} {'type': 'loss', 'content': 0.15856774151325226, 'timestamp': '2025-10-01 04:19:12.315109', 'step': 5182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:12.346180', 'step': 5182, 'epoch': 1} {'type': 'loss', 'content': 0.15636008977890015, 'timestamp': '2025-10-01 04:19:12.348250', 'step': 5183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:12.386361', 'step': 5183, 'epoch': 1} {'type': 'loss', 'content': 0.1955191045999527, 'timestamp': '2025-10-01 04:19:12.409939', 'step': 5184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:12.443530', 'step': 5184, 'epoch': 1} {'type': 'loss', 'content': 0.22131399810314178, 'timestamp': '2025-10-01 04:19:12.445553', 'step': 5185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:12.485853', 'step': 5185, 'epoch': 1} {'type': 'loss', 'content': 0.181756854057312, 'timestamp': '2025-10-01 04:19:12.487873', 'step': 5186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:12.517832', 'step': 5186, 'epoch': 1} {'type': 'loss', 'content': 0.12783105671405792, 'timestamp': '2025-10-01 04:19:12.519851', 'step': 5187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:12.551565', 'step': 5187, 'epoch': 1} {'type': 'loss', 'content': 0.2388649731874466, 'timestamp': '2025-10-01 04:19:12.575026', 'step': 5188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:12.605577', 'step': 5188, 'epoch': 1} {'type': 'loss', 'content': 0.19510963559150696, 'timestamp': '2025-10-01 04:19:12.607778', 'step': 5189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:12.638355', 'step': 5189, 'epoch': 1} {'type': 'loss', 'content': 0.14579786360263824, 'timestamp': '2025-10-01 04:19:12.640846', 'step': 5190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:12.676056', 'step': 5190, 'epoch': 1} {'type': 'loss', 'content': 0.11599432677030563, 'timestamp': '2025-10-01 04:19:12.678574', 'step': 5191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:12.712323', 'step': 5191, 'epoch': 1} {'type': 'loss', 'content': 0.20143932104110718, 'timestamp': '2025-10-01 04:19:12.736074', 'step': 5192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:12.768627', 'step': 5192, 'epoch': 1} {'type': 'loss', 'content': 0.16145050525665283, 'timestamp': '2025-10-01 04:19:12.771332', 'step': 5193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:12.805743', 'step': 5193, 'epoch': 1} {'type': 'loss', 'content': 0.17141929268836975, 'timestamp': '2025-10-01 04:19:12.808056', 'step': 5194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:12.840413', 'step': 5194, 'epoch': 1} {'type': 'loss', 'content': 0.10994613915681839, 'timestamp': '2025-10-01 04:19:12.843456', 'step': 5195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:12.877816', 'step': 5195, 'epoch': 1} {'type': 'loss', 'content': 0.141854926943779, 'timestamp': '2025-10-01 04:19:12.901955', 'step': 5196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:12.933506', 'step': 5196, 'epoch': 1} {'type': 'loss', 'content': 0.27192965149879456, 'timestamp': '2025-10-01 04:19:12.935876', 'step': 5197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:12.967668', 'step': 5197, 'epoch': 1} {'type': 'loss', 'content': 0.2247689813375473, 'timestamp': '2025-10-01 04:19:12.969956', 'step': 5198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:13.001251', 'step': 5198, 'epoch': 1} {'type': 'loss', 'content': 0.13684122264385223, 'timestamp': '2025-10-01 04:19:13.003938', 'step': 5199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:13.036087', 'step': 5199, 'epoch': 1} {'type': 'loss', 'content': 0.19861885905265808, 'timestamp': '2025-10-01 04:19:13.059897', 'step': 5200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:13.091907', 'step': 5200, 'epoch': 1} {'type': 'loss', 'content': 0.25215011835098267, 'timestamp': '2025-10-01 04:19:13.093906', 'step': 5201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:13.127417', 'step': 5201, 'epoch': 1} {'type': 'loss', 'content': 0.18469367921352386, 'timestamp': '2025-10-01 04:19:13.129483', 'step': 5202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:19:13.163806', 'step': 5202, 'epoch': 1} {'type': 'loss', 'content': 0.14499609172344208, 'timestamp': '2025-10-01 04:19:13.168214', 'step': 5203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:13.202081', 'step': 5203, 'epoch': 1} {'type': 'loss', 'content': 0.11117543280124664, 'timestamp': '2025-10-01 04:19:13.225818', 'step': 5204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:13.258078', 'step': 5204, 'epoch': 1} {'type': 'loss', 'content': 0.11928265541791916, 'timestamp': '2025-10-01 04:19:13.260297', 'step': 5205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:13.296189', 'step': 5205, 'epoch': 1} {'type': 'loss', 'content': 0.17709162831306458, 'timestamp': '2025-10-01 04:19:13.298689', 'step': 5206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:13.330516', 'step': 5206, 'epoch': 1} {'type': 'loss', 'content': 0.142745241522789, 'timestamp': '2025-10-01 04:19:13.333091', 'step': 5207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:13.365333', 'step': 5207, 'epoch': 1} {'type': 'loss', 'content': 0.17947836220264435, 'timestamp': '2025-10-01 04:19:13.396013', 'step': 5208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:13.430086', 'step': 5208, 'epoch': 1} {'type': 'loss', 'content': 0.17897897958755493, 'timestamp': '2025-10-01 04:19:13.432201', 'step': 5209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:13.465872', 'step': 5209, 'epoch': 1} {'type': 'loss', 'content': 0.18523259460926056, 'timestamp': '2025-10-01 04:19:13.467907', 'step': 5210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:13.500131', 'step': 5210, 'epoch': 1} {'type': 'loss', 'content': 0.13843752443790436, 'timestamp': '2025-10-01 04:19:13.502535', 'step': 5211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:13.535398', 'step': 5211, 'epoch': 1} {'type': 'loss', 'content': 0.17857548594474792, 'timestamp': '2025-10-01 04:19:13.558914', 'step': 5212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:13.592326', 'step': 5212, 'epoch': 1} {'type': 'loss', 'content': 0.151603102684021, 'timestamp': '2025-10-01 04:19:13.594354', 'step': 5213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:13.629248', 'step': 5213, 'epoch': 1} {'type': 'loss', 'content': 0.20738627016544342, 'timestamp': '2025-10-01 04:19:13.631559', 'step': 5214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:13.664684', 'step': 5214, 'epoch': 1} {'type': 'loss', 'content': 0.11980918794870377, 'timestamp': '2025-10-01 04:19:13.668132', 'step': 5215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:13.702674', 'step': 5215, 'epoch': 1} {'type': 'loss', 'content': 0.13345739245414734, 'timestamp': '2025-10-01 04:19:13.726352', 'step': 5216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:13.758957', 'step': 5216, 'epoch': 1} {'type': 'loss', 'content': 0.16684262454509735, 'timestamp': '2025-10-01 04:19:13.761937', 'step': 5217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:13.795233', 'step': 5217, 'epoch': 1} {'type': 'loss', 'content': 0.15626853704452515, 'timestamp': '2025-10-01 04:19:13.797654', 'step': 5218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:13.832740', 'step': 5218, 'epoch': 1} {'type': 'loss', 'content': 0.1211407259106636, 'timestamp': '2025-10-01 04:19:13.835444', 'step': 5219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:13.866395', 'step': 5219, 'epoch': 1} {'type': 'loss', 'content': 0.18415790796279907, 'timestamp': '2025-10-01 04:19:13.890663', 'step': 5220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:13.922188', 'step': 5220, 'epoch': 1} {'type': 'loss', 'content': 0.23495043814182281, 'timestamp': '2025-10-01 04:19:13.925197', 'step': 5221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:13.955702', 'step': 5221, 'epoch': 1} {'type': 'loss', 'content': 0.2758559286594391, 'timestamp': '2025-10-01 04:19:13.957936', 'step': 5222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:13.988147', 'step': 5222, 'epoch': 1} {'type': 'loss', 'content': 0.09058692306280136, 'timestamp': '2025-10-01 04:19:13.990427', 'step': 5223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:14.023270', 'step': 5223, 'epoch': 1} {'type': 'loss', 'content': 0.14302736520767212, 'timestamp': '2025-10-01 04:19:14.048456', 'step': 5224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:14.083385', 'step': 5224, 'epoch': 1} {'type': 'loss', 'content': 0.11878081411123276, 'timestamp': '2025-10-01 04:19:14.085425', 'step': 5225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:14.116866', 'step': 5225, 'epoch': 1} {'type': 'loss', 'content': 0.11030257493257523, 'timestamp': '2025-10-01 04:19:14.119108', 'step': 5226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:14.156789', 'step': 5226, 'epoch': 1} {'type': 'loss', 'content': 0.16444416344165802, 'timestamp': '2025-10-01 04:19:14.158922', 'step': 5227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:14.191183', 'step': 5227, 'epoch': 1} {'type': 'loss', 'content': 0.08624850958585739, 'timestamp': '2025-10-01 04:19:14.214796', 'step': 5228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:14.247158', 'step': 5228, 'epoch': 1} {'type': 'loss', 'content': 0.18135616183280945, 'timestamp': '2025-10-01 04:19:14.249237', 'step': 5229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:14.280284', 'step': 5229, 'epoch': 1} {'type': 'loss', 'content': 0.18462732434272766, 'timestamp': '2025-10-01 04:19:14.282409', 'step': 5230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:14.313681', 'step': 5230, 'epoch': 1} {'type': 'loss', 'content': 0.07267412543296814, 'timestamp': '2025-10-01 04:19:14.315723', 'step': 5231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:14.348394', 'step': 5231, 'epoch': 1} {'type': 'loss', 'content': 0.17883722484111786, 'timestamp': '2025-10-01 04:19:14.371944', 'step': 5232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:14.404207', 'step': 5232, 'epoch': 1} {'type': 'loss', 'content': 0.1312398761510849, 'timestamp': '2025-10-01 04:19:14.406215', 'step': 5233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:14.439747', 'step': 5233, 'epoch': 1} {'type': 'loss', 'content': 0.2665775418281555, 'timestamp': '2025-10-01 04:19:14.442155', 'step': 5234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:14.477139', 'step': 5234, 'epoch': 1} {'type': 'loss', 'content': 0.06607085466384888, 'timestamp': '2025-10-01 04:19:14.479378', 'step': 5235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:14.512772', 'step': 5235, 'epoch': 1} {'type': 'loss', 'content': 0.13593009114265442, 'timestamp': '2025-10-01 04:19:14.538172', 'step': 5236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:14.571708', 'step': 5236, 'epoch': 1} {'type': 'loss', 'content': 0.11768923699855804, 'timestamp': '2025-10-01 04:19:14.573923', 'step': 5237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:14.606123', 'step': 5237, 'epoch': 1} {'type': 'loss', 'content': 0.20094846189022064, 'timestamp': '2025-10-01 04:19:14.608146', 'step': 5238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:14.638333', 'step': 5238, 'epoch': 1} {'type': 'loss', 'content': 0.12640780210494995, 'timestamp': '2025-10-01 04:19:14.640471', 'step': 5239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:14.673885', 'step': 5239, 'epoch': 1} {'type': 'loss', 'content': 0.20722858607769012, 'timestamp': '2025-10-01 04:19:14.697575', 'step': 5240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:14.728009', 'step': 5240, 'epoch': 1} {'type': 'loss', 'content': 0.1423366665840149, 'timestamp': '2025-10-01 04:19:14.730578', 'step': 5241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:14.762700', 'step': 5241, 'epoch': 1} {'type': 'loss', 'content': 0.16516077518463135, 'timestamp': '2025-10-01 04:19:14.764786', 'step': 5242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:14.796131', 'step': 5242, 'epoch': 1} {'type': 'loss', 'content': 0.12389181554317474, 'timestamp': '2025-10-01 04:19:14.798327', 'step': 5243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:14.829172', 'step': 5243, 'epoch': 1} {'type': 'loss', 'content': 0.14418497681617737, 'timestamp': '2025-10-01 04:19:14.852625', 'step': 5244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:14.883755', 'step': 5244, 'epoch': 1} {'type': 'loss', 'content': 0.0741196796298027, 'timestamp': '2025-10-01 04:19:14.885891', 'step': 5245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:14.917562', 'step': 5245, 'epoch': 1} {'type': 'loss', 'content': 0.134407177567482, 'timestamp': '2025-10-01 04:19:14.919582', 'step': 5246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:14.950388', 'step': 5246, 'epoch': 1} {'type': 'loss', 'content': 0.18313175439834595, 'timestamp': '2025-10-01 04:19:14.958742', 'step': 5247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:14.990172', 'step': 5247, 'epoch': 1} {'type': 'loss', 'content': 0.1263391375541687, 'timestamp': '2025-10-01 04:19:15.013934', 'step': 5248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:15.044538', 'step': 5248, 'epoch': 1} {'type': 'loss', 'content': 0.12691794335842133, 'timestamp': '2025-10-01 04:19:15.046711', 'step': 5249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:15.078097', 'step': 5249, 'epoch': 1} {'type': 'loss', 'content': 0.1808571219444275, 'timestamp': '2025-10-01 04:19:15.080277', 'step': 5250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:15.110799', 'step': 5250, 'epoch': 1} {'type': 'loss', 'content': 0.10114730894565582, 'timestamp': '2025-10-01 04:19:15.113842', 'step': 5251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:15.156979', 'step': 5251, 'epoch': 1} {'type': 'loss', 'content': 0.14273232221603394, 'timestamp': '2025-10-01 04:19:15.181526', 'step': 5252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:15.214800', 'step': 5252, 'epoch': 1} {'type': 'loss', 'content': 0.19028274714946747, 'timestamp': '2025-10-01 04:19:15.216785', 'step': 5253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:15.255449', 'step': 5253, 'epoch': 1} {'type': 'loss', 'content': 0.19520719349384308, 'timestamp': '2025-10-01 04:19:15.257359', 'step': 5254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:15.288836', 'step': 5254, 'epoch': 1} {'type': 'loss', 'content': 0.17853212356567383, 'timestamp': '2025-10-01 04:19:15.291547', 'step': 5255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:15.325954', 'step': 5255, 'epoch': 1} {'type': 'loss', 'content': 0.1596396118402481, 'timestamp': '2025-10-01 04:19:15.349427', 'step': 5256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:15.386896', 'step': 5256, 'epoch': 1} {'type': 'loss', 'content': 0.2873750627040863, 'timestamp': '2025-10-01 04:19:15.389089', 'step': 5257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:15.422857', 'step': 5257, 'epoch': 1} {'type': 'loss', 'content': 0.13865892589092255, 'timestamp': '2025-10-01 04:19:15.430525', 'step': 5258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:15.461507', 'step': 5258, 'epoch': 1} {'type': 'loss', 'content': 0.05943543463945389, 'timestamp': '2025-10-01 04:19:15.473195', 'step': 5259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:15.506348', 'step': 5259, 'epoch': 1} {'type': 'loss', 'content': 0.12113049626350403, 'timestamp': '2025-10-01 04:19:15.532636', 'step': 5260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:15.563549', 'step': 5260, 'epoch': 1} {'type': 'loss', 'content': 0.10029975324869156, 'timestamp': '2025-10-01 04:19:15.565544', 'step': 5261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:15.600057', 'step': 5261, 'epoch': 1} {'type': 'loss', 'content': 0.0803208127617836, 'timestamp': '2025-10-01 04:19:15.602285', 'step': 5262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:15.639956', 'step': 5262, 'epoch': 1} {'type': 'loss', 'content': 0.07243379950523376, 'timestamp': '2025-10-01 04:19:15.642160', 'step': 5263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:15.678129', 'step': 5263, 'epoch': 1} {'type': 'loss', 'content': 0.13603608310222626, 'timestamp': '2025-10-01 04:19:15.701797', 'step': 5264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:15.733708', 'step': 5264, 'epoch': 1} {'type': 'loss', 'content': 0.2718609869480133, 'timestamp': '2025-10-01 04:19:15.739852', 'step': 5265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:15.770210', 'step': 5265, 'epoch': 1} {'type': 'loss', 'content': 0.19979791343212128, 'timestamp': '2025-10-01 04:19:15.774484', 'step': 5266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:15.805217', 'step': 5266, 'epoch': 1} {'type': 'loss', 'content': 0.1763768047094345, 'timestamp': '2025-10-01 04:19:15.807266', 'step': 5267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:15.837819', 'step': 5267, 'epoch': 1} {'type': 'loss', 'content': 0.11443150788545609, 'timestamp': '2025-10-01 04:19:15.861510', 'step': 5268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:15.895680', 'step': 5268, 'epoch': 1} {'type': 'loss', 'content': 0.17806243896484375, 'timestamp': '2025-10-01 04:19:15.897667', 'step': 5269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:15.928846', 'step': 5269, 'epoch': 1} {'type': 'loss', 'content': 0.12773993611335754, 'timestamp': '2025-10-01 04:19:15.931416', 'step': 5270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:15.963155', 'step': 5270, 'epoch': 1} {'type': 'loss', 'content': 0.2084575742483139, 'timestamp': '2025-10-01 04:19:15.965540', 'step': 5271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:16.013789', 'step': 5271, 'epoch': 1} {'type': 'loss', 'content': 0.10095579922199249, 'timestamp': '2025-10-01 04:19:16.038468', 'step': 5272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:16.070175', 'step': 5272, 'epoch': 1} {'type': 'loss', 'content': 0.18329200148582458, 'timestamp': '2025-10-01 04:19:16.073130', 'step': 5273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:16.104025', 'step': 5273, 'epoch': 1} {'type': 'loss', 'content': 0.24175836145877838, 'timestamp': '2025-10-01 04:19:16.106214', 'step': 5274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:16.136372', 'step': 5274, 'epoch': 1} {'type': 'loss', 'content': 0.2421625256538391, 'timestamp': '2025-10-01 04:19:16.139224', 'step': 5275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:16.168900', 'step': 5275, 'epoch': 1} {'type': 'loss', 'content': 0.13739782571792603, 'timestamp': '2025-10-01 04:19:16.193704', 'step': 5276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:16.225686', 'step': 5276, 'epoch': 1} {'type': 'loss', 'content': 0.10530583560466766, 'timestamp': '2025-10-01 04:19:16.229605', 'step': 5277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:16.262845', 'step': 5277, 'epoch': 1} {'type': 'loss', 'content': 0.146692156791687, 'timestamp': '2025-10-01 04:19:16.264915', 'step': 5278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:16.295888', 'step': 5278, 'epoch': 1} {'type': 'loss', 'content': 0.30609726905822754, 'timestamp': '2025-10-01 04:19:16.298349', 'step': 5279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:16.332341', 'step': 5279, 'epoch': 1} {'type': 'loss', 'content': 0.11942844092845917, 'timestamp': '2025-10-01 04:19:16.355709', 'step': 5280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:16.391561', 'step': 5280, 'epoch': 1} {'type': 'loss', 'content': 0.18288366496562958, 'timestamp': '2025-10-01 04:19:16.393599', 'step': 5281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:16.424921', 'step': 5281, 'epoch': 1} {'type': 'loss', 'content': 0.1838865876197815, 'timestamp': '2025-10-01 04:19:16.427915', 'step': 5282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:16.462394', 'step': 5282, 'epoch': 1} {'type': 'loss', 'content': 0.13524049520492554, 'timestamp': '2025-10-01 04:19:16.464695', 'step': 5283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:16.496043', 'step': 5283, 'epoch': 1} {'type': 'loss', 'content': 0.11816646158695221, 'timestamp': '2025-10-01 04:19:16.519631', 'step': 5284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:16.551312', 'step': 5284, 'epoch': 1} {'type': 'loss', 'content': 0.187697634100914, 'timestamp': '2025-10-01 04:19:16.553370', 'step': 5285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:16.584242', 'step': 5285, 'epoch': 1} {'type': 'loss', 'content': 0.27040091156959534, 'timestamp': '2025-10-01 04:19:16.586463', 'step': 5286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:16.616894', 'step': 5286, 'epoch': 1} {'type': 'loss', 'content': 0.12957173585891724, 'timestamp': '2025-10-01 04:19:16.619216', 'step': 5287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:16.650848', 'step': 5287, 'epoch': 1} {'type': 'loss', 'content': 0.11429493874311447, 'timestamp': '2025-10-01 04:19:16.674447', 'step': 5288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:16.707225', 'step': 5288, 'epoch': 1} {'type': 'loss', 'content': 0.2453320026397705, 'timestamp': '2025-10-01 04:19:16.709306', 'step': 5289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:16.740254', 'step': 5289, 'epoch': 1} {'type': 'loss', 'content': 0.11793512105941772, 'timestamp': '2025-10-01 04:19:16.742301', 'step': 5290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:16.772804', 'step': 5290, 'epoch': 1} {'type': 'loss', 'content': 0.23057854175567627, 'timestamp': '2025-10-01 04:19:16.774850', 'step': 5291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:16.806569', 'step': 5291, 'epoch': 1} {'type': 'loss', 'content': 0.17958258092403412, 'timestamp': '2025-10-01 04:19:16.832964', 'step': 5292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:16.864465', 'step': 5292, 'epoch': 1} {'type': 'loss', 'content': 0.22317776083946228, 'timestamp': '2025-10-01 04:19:16.866485', 'step': 5293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:16.899197', 'step': 5293, 'epoch': 1} {'type': 'loss', 'content': 0.14328035712242126, 'timestamp': '2025-10-01 04:19:16.901611', 'step': 5294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:16.933090', 'step': 5294, 'epoch': 1} {'type': 'loss', 'content': 0.10453781485557556, 'timestamp': '2025-10-01 04:19:16.934968', 'step': 5295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:16.965623', 'step': 5295, 'epoch': 1} {'type': 'loss', 'content': 0.11655057966709137, 'timestamp': '2025-10-01 04:19:16.989145', 'step': 5296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.020810', 'step': 5296, 'epoch': 1} {'type': 'loss', 'content': 0.1888599544763565, 'timestamp': '2025-10-01 04:19:17.022888', 'step': 5297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.054885', 'step': 5297, 'epoch': 1} {'type': 'loss', 'content': 0.17326180636882782, 'timestamp': '2025-10-01 04:19:17.056769', 'step': 5298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:17.088796', 'step': 5298, 'epoch': 1} {'type': 'loss', 'content': 0.16823157668113708, 'timestamp': '2025-10-01 04:19:17.090920', 'step': 5299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:17.121857', 'step': 5299, 'epoch': 1} {'type': 'loss', 'content': 0.1368808001279831, 'timestamp': '2025-10-01 04:19:17.145556', 'step': 5300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.177545', 'step': 5300, 'epoch': 1} {'type': 'loss', 'content': 0.1334071308374405, 'timestamp': '2025-10-01 04:19:17.179786', 'step': 5301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:17.214043', 'step': 5301, 'epoch': 1} {'type': 'loss', 'content': 0.18027783930301666, 'timestamp': '2025-10-01 04:19:17.216519', 'step': 5302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.247747', 'step': 5302, 'epoch': 1} {'type': 'loss', 'content': 0.23712953925132751, 'timestamp': '2025-10-01 04:19:17.249776', 'step': 5303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.286550', 'step': 5303, 'epoch': 1} {'type': 'loss', 'content': 0.1328858733177185, 'timestamp': '2025-10-01 04:19:17.310107', 'step': 5304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.342918', 'step': 5304, 'epoch': 1} {'type': 'loss', 'content': 0.0950881689786911, 'timestamp': '2025-10-01 04:19:17.344990', 'step': 5305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:17.378031', 'step': 5305, 'epoch': 1} {'type': 'loss', 'content': 0.17911772429943085, 'timestamp': '2025-10-01 04:19:17.380028', 'step': 5306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.415341', 'step': 5306, 'epoch': 1} {'type': 'loss', 'content': 0.13688725233078003, 'timestamp': '2025-10-01 04:19:17.417497', 'step': 5307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:17.449746', 'step': 5307, 'epoch': 1} {'type': 'loss', 'content': 0.15097564458847046, 'timestamp': '2025-10-01 04:19:17.473771', 'step': 5308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.506746', 'step': 5308, 'epoch': 1} {'type': 'loss', 'content': 0.1543818712234497, 'timestamp': '2025-10-01 04:19:17.508807', 'step': 5309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:17.539611', 'step': 5309, 'epoch': 1} {'type': 'loss', 'content': 0.15410922467708588, 'timestamp': '2025-10-01 04:19:17.541981', 'step': 5310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.573794', 'step': 5310, 'epoch': 1} {'type': 'loss', 'content': 0.10200849920511246, 'timestamp': '2025-10-01 04:19:17.581229', 'step': 5311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:17.613001', 'step': 5311, 'epoch': 1} {'type': 'loss', 'content': 0.1268848180770874, 'timestamp': '2025-10-01 04:19:17.637067', 'step': 5312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:17.672947', 'step': 5312, 'epoch': 1} {'type': 'loss', 'content': 0.20117081701755524, 'timestamp': '2025-10-01 04:19:17.674909', 'step': 5313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.711972', 'step': 5313, 'epoch': 1} {'type': 'loss', 'content': 0.28202536702156067, 'timestamp': '2025-10-01 04:19:17.714105', 'step': 5314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:17.745504', 'step': 5314, 'epoch': 1} {'type': 'loss', 'content': 0.11959439516067505, 'timestamp': '2025-10-01 04:19:17.747448', 'step': 5315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:17.786173', 'step': 5315, 'epoch': 1} {'type': 'loss', 'content': 0.19069406390190125, 'timestamp': '2025-10-01 04:19:17.809755', 'step': 5316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:17.841147', 'step': 5316, 'epoch': 1} {'type': 'loss', 'content': 0.09760208427906036, 'timestamp': '2025-10-01 04:19:17.843212', 'step': 5317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.873483', 'step': 5317, 'epoch': 1} {'type': 'loss', 'content': 0.12478329241275787, 'timestamp': '2025-10-01 04:19:17.875560', 'step': 5318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.908810', 'step': 5318, 'epoch': 1} {'type': 'loss', 'content': 0.14385707676410675, 'timestamp': '2025-10-01 04:19:17.910832', 'step': 5319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:17.942209', 'step': 5319, 'epoch': 1} {'type': 'loss', 'content': 0.16497750580310822, 'timestamp': '2025-10-01 04:19:17.965692', 'step': 5320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:17.997908', 'step': 5320, 'epoch': 1} {'type': 'loss', 'content': 0.12247848510742188, 'timestamp': '2025-10-01 04:19:18.000123', 'step': 5321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:18.031799', 'step': 5321, 'epoch': 1} {'type': 'loss', 'content': 0.10220996290445328, 'timestamp': '2025-10-01 04:19:18.034195', 'step': 5322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:18.065585', 'step': 5322, 'epoch': 1} {'type': 'loss', 'content': 0.1691933125257492, 'timestamp': '2025-10-01 04:19:18.067850', 'step': 5323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:18.100906', 'step': 5323, 'epoch': 1} {'type': 'loss', 'content': 0.2507690191268921, 'timestamp': '2025-10-01 04:19:18.124689', 'step': 5324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:18.156176', 'step': 5324, 'epoch': 1} {'type': 'loss', 'content': 0.1797112375497818, 'timestamp': '2025-10-01 04:19:18.158296', 'step': 5325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:18.194317', 'step': 5325, 'epoch': 1} {'type': 'loss', 'content': 0.12369385361671448, 'timestamp': '2025-10-01 04:19:18.196888', 'step': 5326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:18.228540', 'step': 5326, 'epoch': 1} {'type': 'loss', 'content': 0.11985991150140762, 'timestamp': '2025-10-01 04:19:18.230920', 'step': 5327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:18.263532', 'step': 5327, 'epoch': 1} {'type': 'loss', 'content': 0.13105933368206024, 'timestamp': '2025-10-01 04:19:18.287977', 'step': 5328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:18.321045', 'step': 5328, 'epoch': 1} {'type': 'loss', 'content': 0.20159870386123657, 'timestamp': '2025-10-01 04:19:18.326670', 'step': 5329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:19:18.358869', 'step': 5329, 'epoch': 1} {'type': 'loss', 'content': 0.1544753760099411, 'timestamp': '2025-10-01 04:19:18.363727', 'step': 5330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:18.395217', 'step': 5330, 'epoch': 1} {'type': 'loss', 'content': 0.16277676820755005, 'timestamp': '2025-10-01 04:19:18.397383', 'step': 5331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:18.428020', 'step': 5331, 'epoch': 1} {'type': 'loss', 'content': 0.17748212814331055, 'timestamp': '2025-10-01 04:19:18.451400', 'step': 5332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:18.483807', 'step': 5332, 'epoch': 1} {'type': 'loss', 'content': 0.18508191406726837, 'timestamp': '2025-10-01 04:19:18.488460', 'step': 5333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:18.519081', 'step': 5333, 'epoch': 1} {'type': 'loss', 'content': 0.07375378161668777, 'timestamp': '2025-10-01 04:19:18.521162', 'step': 5334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:18.552368', 'step': 5334, 'epoch': 1} {'type': 'loss', 'content': 0.2233944982290268, 'timestamp': '2025-10-01 04:19:18.554551', 'step': 5335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:18.584968', 'step': 5335, 'epoch': 1} {'type': 'loss', 'content': 0.16699618101119995, 'timestamp': '2025-10-01 04:19:18.608510', 'step': 5336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:18.642205', 'step': 5336, 'epoch': 1} {'type': 'loss', 'content': 0.1538350135087967, 'timestamp': '2025-10-01 04:19:18.644510', 'step': 5337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:18.675002', 'step': 5337, 'epoch': 1} {'type': 'loss', 'content': 0.22397209703922272, 'timestamp': '2025-10-01 04:19:18.677000', 'step': 5338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:18.707578', 'step': 5338, 'epoch': 1} {'type': 'loss', 'content': 0.15759986639022827, 'timestamp': '2025-10-01 04:19:18.710051', 'step': 5339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:18.740225', 'step': 5339, 'epoch': 1} {'type': 'loss', 'content': 0.15453876554965973, 'timestamp': '2025-10-01 04:19:18.763630', 'step': 5340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:18.796222', 'step': 5340, 'epoch': 1} {'type': 'loss', 'content': 0.07323623448610306, 'timestamp': '2025-10-01 04:19:18.798224', 'step': 5341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:18.828783', 'step': 5341, 'epoch': 1} {'type': 'loss', 'content': 0.056883834302425385, 'timestamp': '2025-10-01 04:19:18.830965', 'step': 5342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:18.870545', 'step': 5342, 'epoch': 1} {'type': 'loss', 'content': 0.1618073433637619, 'timestamp': '2025-10-01 04:19:18.873213', 'step': 5343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:18.903529', 'step': 5343, 'epoch': 1} {'type': 'loss', 'content': 0.16512665152549744, 'timestamp': '2025-10-01 04:19:18.928753', 'step': 5344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:18.958360', 'step': 5344, 'epoch': 1} {'type': 'loss', 'content': 0.18606001138687134, 'timestamp': '2025-10-01 04:19:18.960421', 'step': 5345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:18.991689', 'step': 5345, 'epoch': 1} {'type': 'loss', 'content': 0.21413208544254303, 'timestamp': '2025-10-01 04:19:18.994049', 'step': 5346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:19.025087', 'step': 5346, 'epoch': 1} {'type': 'loss', 'content': 0.18546870350837708, 'timestamp': '2025-10-01 04:19:19.027082', 'step': 5347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.057834', 'step': 5347, 'epoch': 1} {'type': 'loss', 'content': 0.23334279656410217, 'timestamp': '2025-10-01 04:19:19.081352', 'step': 5348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:19.114048', 'step': 5348, 'epoch': 1} {'type': 'loss', 'content': 0.1567569226026535, 'timestamp': '2025-10-01 04:19:19.116093', 'step': 5349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:19.147477', 'step': 5349, 'epoch': 1} {'type': 'loss', 'content': 0.20510026812553406, 'timestamp': '2025-10-01 04:19:19.149747', 'step': 5350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:19.181051', 'step': 5350, 'epoch': 1} {'type': 'loss', 'content': 0.20283673703670502, 'timestamp': '2025-10-01 04:19:19.183245', 'step': 5351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.215045', 'step': 5351, 'epoch': 1} {'type': 'loss', 'content': 0.21378028392791748, 'timestamp': '2025-10-01 04:19:19.284709', 'step': 5352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.322126', 'step': 5352, 'epoch': 1} {'type': 'loss', 'content': 0.10372354090213776, 'timestamp': '2025-10-01 04:19:19.324823', 'step': 5353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:19.354974', 'step': 5353, 'epoch': 1} {'type': 'loss', 'content': 0.11293627321720123, 'timestamp': '2025-10-01 04:19:19.357739', 'step': 5354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:19.395112', 'step': 5354, 'epoch': 1} {'type': 'loss', 'content': 0.22113217413425446, 'timestamp': '2025-10-01 04:19:19.397707', 'step': 5355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:19.429592', 'step': 5355, 'epoch': 1} {'type': 'loss', 'content': 0.17941145598888397, 'timestamp': '2025-10-01 04:19:19.453305', 'step': 5356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.483453', 'step': 5356, 'epoch': 1} {'type': 'loss', 'content': 0.14122939109802246, 'timestamp': '2025-10-01 04:19:19.486166', 'step': 5357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:19.522293', 'step': 5357, 'epoch': 1} {'type': 'loss', 'content': 0.19202344119548798, 'timestamp': '2025-10-01 04:19:19.524630', 'step': 5358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.555698', 'step': 5358, 'epoch': 1} {'type': 'loss', 'content': 0.10985502600669861, 'timestamp': '2025-10-01 04:19:19.557843', 'step': 5359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:19.588475', 'step': 5359, 'epoch': 1} {'type': 'loss', 'content': 0.15975499153137207, 'timestamp': '2025-10-01 04:19:19.612062', 'step': 5360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.642569', 'step': 5360, 'epoch': 1} {'type': 'loss', 'content': 0.14640070497989655, 'timestamp': '2025-10-01 04:19:19.644561', 'step': 5361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.676591', 'step': 5361, 'epoch': 1} {'type': 'loss', 'content': 0.12215614318847656, 'timestamp': '2025-10-01 04:19:19.678532', 'step': 5362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.710314', 'step': 5362, 'epoch': 1} {'type': 'loss', 'content': 0.15194520354270935, 'timestamp': '2025-10-01 04:19:19.712312', 'step': 5363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.743793', 'step': 5363, 'epoch': 1} {'type': 'loss', 'content': 0.1148105338215828, 'timestamp': '2025-10-01 04:19:19.767336', 'step': 5364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:19.797773', 'step': 5364, 'epoch': 1} {'type': 'loss', 'content': 0.11741219460964203, 'timestamp': '2025-10-01 04:19:19.800129', 'step': 5365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:19.830536', 'step': 5365, 'epoch': 1} {'type': 'loss', 'content': 0.1571534126996994, 'timestamp': '2025-10-01 04:19:19.832770', 'step': 5366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.863525', 'step': 5366, 'epoch': 1} {'type': 'loss', 'content': 0.08523818105459213, 'timestamp': '2025-10-01 04:19:19.865452', 'step': 5367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:19.896623', 'step': 5367, 'epoch': 1} {'type': 'loss', 'content': 0.10395605862140656, 'timestamp': '2025-10-01 04:19:19.920133', 'step': 5368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:19.950818', 'step': 5368, 'epoch': 1} {'type': 'loss', 'content': 0.19868166744709015, 'timestamp': '2025-10-01 04:19:19.953698', 'step': 5369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:19.984422', 'step': 5369, 'epoch': 1} {'type': 'loss', 'content': 0.11717832833528519, 'timestamp': '2025-10-01 04:19:19.986619', 'step': 5370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:20.016708', 'step': 5370, 'epoch': 1} {'type': 'loss', 'content': 0.1493692547082901, 'timestamp': '2025-10-01 04:19:20.019016', 'step': 5371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:20.048798', 'step': 5371, 'epoch': 1} {'type': 'loss', 'content': 0.1726130247116089, 'timestamp': '2025-10-01 04:19:20.072156', 'step': 5372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:20.103176', 'step': 5372, 'epoch': 1} {'type': 'loss', 'content': 0.13640090823173523, 'timestamp': '2025-10-01 04:19:20.112773', 'step': 5373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:20.143684', 'step': 5373, 'epoch': 1} {'type': 'loss', 'content': 0.1270846426486969, 'timestamp': '2025-10-01 04:19:20.146371', 'step': 5374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:20.177358', 'step': 5374, 'epoch': 1} {'type': 'loss', 'content': 0.0867103561758995, 'timestamp': '2025-10-01 04:19:20.179780', 'step': 5375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:20.210237', 'step': 5375, 'epoch': 1} {'type': 'loss', 'content': 0.10855691134929657, 'timestamp': '2025-10-01 04:19:20.233739', 'step': 5376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:20.265086', 'step': 5376, 'epoch': 1} {'type': 'loss', 'content': 0.12187159061431885, 'timestamp': '2025-10-01 04:19:20.267463', 'step': 5377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:20.297709', 'step': 5377, 'epoch': 1} {'type': 'loss', 'content': 0.11241880059242249, 'timestamp': '2025-10-01 04:19:20.299834', 'step': 5378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:20.330373', 'step': 5378, 'epoch': 1} {'type': 'loss', 'content': 0.1974695771932602, 'timestamp': '2025-10-01 04:19:20.334354', 'step': 5379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:20.366418', 'step': 5379, 'epoch': 1} {'type': 'loss', 'content': 0.17609506845474243, 'timestamp': '2025-10-01 04:19:20.390146', 'step': 5380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:20.422117', 'step': 5380, 'epoch': 1} {'type': 'loss', 'content': 0.13361693918704987, 'timestamp': '2025-10-01 04:19:20.424305', 'step': 5381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:20.458854', 'step': 5381, 'epoch': 1} {'type': 'loss', 'content': 0.1413542628288269, 'timestamp': '2025-10-01 04:19:20.460891', 'step': 5382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:20.493032', 'step': 5382, 'epoch': 1} {'type': 'loss', 'content': 0.09423977136611938, 'timestamp': '2025-10-01 04:19:20.497373', 'step': 5383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:20.534113', 'step': 5383, 'epoch': 1} {'type': 'loss', 'content': 0.14365562796592712, 'timestamp': '2025-10-01 04:19:20.557466', 'step': 5384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:20.588133', 'step': 5384, 'epoch': 1} {'type': 'loss', 'content': 0.12252943962812424, 'timestamp': '2025-10-01 04:19:20.590326', 'step': 5385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:20.621687', 'step': 5385, 'epoch': 1} {'type': 'loss', 'content': 0.09522849321365356, 'timestamp': '2025-10-01 04:19:20.623872', 'step': 5386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:20.653991', 'step': 5386, 'epoch': 1} {'type': 'loss', 'content': 0.16272246837615967, 'timestamp': '2025-10-01 04:19:20.656066', 'step': 5387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:20.697314', 'step': 5387, 'epoch': 1} {'type': 'loss', 'content': 0.14806944131851196, 'timestamp': '2025-10-01 04:19:20.720962', 'step': 5388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:20.753532', 'step': 5388, 'epoch': 1} {'type': 'loss', 'content': 0.11180365830659866, 'timestamp': '2025-10-01 04:19:20.755576', 'step': 5389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:20.788626', 'step': 5389, 'epoch': 1} {'type': 'loss', 'content': 0.19595685601234436, 'timestamp': '2025-10-01 04:19:20.792106', 'step': 5390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:20.822909', 'step': 5390, 'epoch': 1} {'type': 'loss', 'content': 0.1731477528810501, 'timestamp': '2025-10-01 04:19:20.825210', 'step': 5391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:20.856460', 'step': 5391, 'epoch': 1} {'type': 'loss', 'content': 0.23167884349822998, 'timestamp': '2025-10-01 04:19:20.880348', 'step': 5392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:20.911686', 'step': 5392, 'epoch': 1} {'type': 'loss', 'content': 0.07734079658985138, 'timestamp': '2025-10-01 04:19:20.913930', 'step': 5393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:20.951162', 'step': 5393, 'epoch': 1} {'type': 'loss', 'content': 0.11004726588726044, 'timestamp': '2025-10-01 04:19:20.953605', 'step': 5394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:20.985935', 'step': 5394, 'epoch': 1} {'type': 'loss', 'content': 0.20757906138896942, 'timestamp': '2025-10-01 04:19:20.988490', 'step': 5395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:21.021439', 'step': 5395, 'epoch': 1} {'type': 'loss', 'content': 0.18615932762622833, 'timestamp': '2025-10-01 04:19:21.049412', 'step': 5396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.079697', 'step': 5396, 'epoch': 1} {'type': 'loss', 'content': 0.11578197777271271, 'timestamp': '2025-10-01 04:19:21.081991', 'step': 5397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.113882', 'step': 5397, 'epoch': 1} {'type': 'loss', 'content': 0.19061817228794098, 'timestamp': '2025-10-01 04:19:21.117083', 'step': 5398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:21.149219', 'step': 5398, 'epoch': 1} {'type': 'loss', 'content': 0.20362722873687744, 'timestamp': '2025-10-01 04:19:21.157052', 'step': 5399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:21.188886', 'step': 5399, 'epoch': 1} {'type': 'loss', 'content': 0.17743657529354095, 'timestamp': '2025-10-01 04:19:21.212717', 'step': 5400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.243431', 'step': 5400, 'epoch': 1} {'type': 'loss', 'content': 0.1190982535481453, 'timestamp': '2025-10-01 04:19:21.251879', 'step': 5401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:21.281823', 'step': 5401, 'epoch': 1} {'type': 'loss', 'content': 0.14221349358558655, 'timestamp': '2025-10-01 04:19:21.283951', 'step': 5402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:21.315890', 'step': 5402, 'epoch': 1} {'type': 'loss', 'content': 0.1381918489933014, 'timestamp': '2025-10-01 04:19:21.318452', 'step': 5403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:21.351876', 'step': 5403, 'epoch': 1} {'type': 'loss', 'content': 0.11408517509698868, 'timestamp': '2025-10-01 04:19:21.375406', 'step': 5404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.408193', 'step': 5404, 'epoch': 1} {'type': 'loss', 'content': 0.11855088919401169, 'timestamp': '2025-10-01 04:19:21.410429', 'step': 5405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.441205', 'step': 5405, 'epoch': 1} {'type': 'loss', 'content': 0.20474861562252045, 'timestamp': '2025-10-01 04:19:21.443125', 'step': 5406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.474512', 'step': 5406, 'epoch': 1} {'type': 'loss', 'content': 0.16419705748558044, 'timestamp': '2025-10-01 04:19:21.476780', 'step': 5407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:21.515688', 'step': 5407, 'epoch': 1} {'type': 'loss', 'content': 0.12862356007099152, 'timestamp': '2025-10-01 04:19:21.539515', 'step': 5408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:21.569273', 'step': 5408, 'epoch': 1} {'type': 'loss', 'content': 0.07190003991127014, 'timestamp': '2025-10-01 04:19:21.571556', 'step': 5409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:21.603364', 'step': 5409, 'epoch': 1} {'type': 'loss', 'content': 0.12230449914932251, 'timestamp': '2025-10-01 04:19:21.605905', 'step': 5410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.637725', 'step': 5410, 'epoch': 1} {'type': 'loss', 'content': 0.14594916999340057, 'timestamp': '2025-10-01 04:19:21.639881', 'step': 5411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:21.671689', 'step': 5411, 'epoch': 1} {'type': 'loss', 'content': 0.22365882992744446, 'timestamp': '2025-10-01 04:19:21.695720', 'step': 5412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.726423', 'step': 5412, 'epoch': 1} {'type': 'loss', 'content': 0.12194547057151794, 'timestamp': '2025-10-01 04:19:21.728367', 'step': 5413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:21.761574', 'step': 5413, 'epoch': 1} {'type': 'loss', 'content': 0.09990228712558746, 'timestamp': '2025-10-01 04:19:21.763676', 'step': 5414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.795344', 'step': 5414, 'epoch': 1} {'type': 'loss', 'content': 0.15018418431282043, 'timestamp': '2025-10-01 04:19:21.797278', 'step': 5415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.831048', 'step': 5415, 'epoch': 1} {'type': 'loss', 'content': 0.23645587265491486, 'timestamp': '2025-10-01 04:19:21.854623', 'step': 5416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:21.886188', 'step': 5416, 'epoch': 1} {'type': 'loss', 'content': 0.15566493570804596, 'timestamp': '2025-10-01 04:19:21.889307', 'step': 5417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:21.920567', 'step': 5417, 'epoch': 1} {'type': 'loss', 'content': 0.19866977632045746, 'timestamp': '2025-10-01 04:19:21.922950', 'step': 5418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:21.958411', 'step': 5418, 'epoch': 1} {'type': 'loss', 'content': 0.11020497977733612, 'timestamp': '2025-10-01 04:19:21.960904', 'step': 5419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:21.997751', 'step': 5419, 'epoch': 1} {'type': 'loss', 'content': 0.12187589704990387, 'timestamp': '2025-10-01 04:19:22.021055', 'step': 5420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:22.053000', 'step': 5420, 'epoch': 1} {'type': 'loss', 'content': 0.12398379296064377, 'timestamp': '2025-10-01 04:19:22.055190', 'step': 5421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:22.091701', 'step': 5421, 'epoch': 1} {'type': 'loss', 'content': 0.2079927921295166, 'timestamp': '2025-10-01 04:19:22.093910', 'step': 5422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:22.126590', 'step': 5422, 'epoch': 1} {'type': 'loss', 'content': 0.11730920523405075, 'timestamp': '2025-10-01 04:19:22.129345', 'step': 5423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:22.160027', 'step': 5423, 'epoch': 1} {'type': 'loss', 'content': 0.17766623198986053, 'timestamp': '2025-10-01 04:19:22.183826', 'step': 5424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:22.222070', 'step': 5424, 'epoch': 1} {'type': 'loss', 'content': 0.14634458720684052, 'timestamp': '2025-10-01 04:19:22.224381', 'step': 5425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:22.256622', 'step': 5425, 'epoch': 1} {'type': 'loss', 'content': 0.10635486990213394, 'timestamp': '2025-10-01 04:19:22.259557', 'step': 5426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:22.299460', 'step': 5426, 'epoch': 1} {'type': 'loss', 'content': 0.09209145605564117, 'timestamp': '2025-10-01 04:19:22.315943', 'step': 5427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:22.353220', 'step': 5427, 'epoch': 1} {'type': 'loss', 'content': 0.21928362548351288, 'timestamp': '2025-10-01 04:19:22.376838', 'step': 5428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:22.406395', 'step': 5428, 'epoch': 1} {'type': 'loss', 'content': 0.0884782075881958, 'timestamp': '2025-10-01 04:19:22.412920', 'step': 5429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:22.444173', 'step': 5429, 'epoch': 1} {'type': 'loss', 'content': 0.09251821041107178, 'timestamp': '2025-10-01 04:19:22.446245', 'step': 5430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:22.490361', 'step': 5430, 'epoch': 1} {'type': 'loss', 'content': 0.14130137860774994, 'timestamp': '2025-10-01 04:19:22.494905', 'step': 5431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:22.526621', 'step': 5431, 'epoch': 1} {'type': 'loss', 'content': 0.10978322476148605, 'timestamp': '2025-10-01 04:19:22.550011', 'step': 5432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:22.581197', 'step': 5432, 'epoch': 1} {'type': 'loss', 'content': 0.1358650028705597, 'timestamp': '2025-10-01 04:19:22.583417', 'step': 5433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:22.613574', 'step': 5433, 'epoch': 1} {'type': 'loss', 'content': 0.20106402039527893, 'timestamp': '2025-10-01 04:19:22.615549', 'step': 5434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:22.647154', 'step': 5434, 'epoch': 1} {'type': 'loss', 'content': 0.2820148169994354, 'timestamp': '2025-10-01 04:19:22.652829', 'step': 5435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:22.695898', 'step': 5435, 'epoch': 1} {'type': 'loss', 'content': 0.12629082798957825, 'timestamp': '2025-10-01 04:19:22.719467', 'step': 5436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:22.755009', 'step': 5436, 'epoch': 1} {'type': 'loss', 'content': 0.15748579800128937, 'timestamp': '2025-10-01 04:19:22.756999', 'step': 5437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:22.789784', 'step': 5437, 'epoch': 1} {'type': 'loss', 'content': 0.16875122487545013, 'timestamp': '2025-10-01 04:19:22.792816', 'step': 5438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:22.828116', 'step': 5438, 'epoch': 1} {'type': 'loss', 'content': 0.13718697428703308, 'timestamp': '2025-10-01 04:19:22.830173', 'step': 5439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:22.861946', 'step': 5439, 'epoch': 1} {'type': 'loss', 'content': 0.14323745667934418, 'timestamp': '2025-10-01 04:19:22.887350', 'step': 5440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:22.918564', 'step': 5440, 'epoch': 1} {'type': 'loss', 'content': 0.19996480643749237, 'timestamp': '2025-10-01 04:19:22.920514', 'step': 5441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:22.955408', 'step': 5441, 'epoch': 1} {'type': 'loss', 'content': 0.13882353901863098, 'timestamp': '2025-10-01 04:19:22.957406', 'step': 5442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:22.988169', 'step': 5442, 'epoch': 1} {'type': 'loss', 'content': 0.15421324968338013, 'timestamp': '2025-10-01 04:19:22.990138', 'step': 5443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:23.026899', 'step': 5443, 'epoch': 1} {'type': 'loss', 'content': 0.11278249323368073, 'timestamp': '2025-10-01 04:19:23.050880', 'step': 5444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:23.087179', 'step': 5444, 'epoch': 1} {'type': 'loss', 'content': 0.12172389030456543, 'timestamp': '2025-10-01 04:19:23.092460', 'step': 5445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:23.134477', 'step': 5445, 'epoch': 1} {'type': 'loss', 'content': 0.1502743661403656, 'timestamp': '2025-10-01 04:19:23.136425', 'step': 5446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:23.172013', 'step': 5446, 'epoch': 1} {'type': 'loss', 'content': 0.14891648292541504, 'timestamp': '2025-10-01 04:19:23.173947', 'step': 5447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:23.205124', 'step': 5447, 'epoch': 1} {'type': 'loss', 'content': 0.11914102733135223, 'timestamp': '2025-10-01 04:19:23.228660', 'step': 5448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:23.264137', 'step': 5448, 'epoch': 1} {'type': 'loss', 'content': 0.19793927669525146, 'timestamp': '2025-10-01 04:19:23.265794', 'step': 5449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:23.296078', 'step': 5449, 'epoch': 1} {'type': 'loss', 'content': 0.12994569540023804, 'timestamp': '2025-10-01 04:19:23.298472', 'step': 5450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:23.330143', 'step': 5450, 'epoch': 1} {'type': 'loss', 'content': 0.2577025592327118, 'timestamp': '2025-10-01 04:19:23.331980', 'step': 5451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:23.362630', 'step': 5451, 'epoch': 1} {'type': 'loss', 'content': 0.12758506834506989, 'timestamp': '2025-10-01 04:19:23.385927', 'step': 5452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:23.416107', 'step': 5452, 'epoch': 1} {'type': 'loss', 'content': 0.15519587695598602, 'timestamp': '2025-10-01 04:19:23.418087', 'step': 5453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:23.449551', 'step': 5453, 'epoch': 1} {'type': 'loss', 'content': 0.2321476936340332, 'timestamp': '2025-10-01 04:19:23.454468', 'step': 5454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:23.485469', 'step': 5454, 'epoch': 1} {'type': 'loss', 'content': 0.13860346376895905, 'timestamp': '2025-10-01 04:19:23.487402', 'step': 5455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:23.518359', 'step': 5455, 'epoch': 1} {'type': 'loss', 'content': 0.09602243453264236, 'timestamp': '2025-10-01 04:19:23.541649', 'step': 5456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:23.572260', 'step': 5456, 'epoch': 1} {'type': 'loss', 'content': 0.11110471934080124, 'timestamp': '2025-10-01 04:19:23.574443', 'step': 5457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:23.604912', 'step': 5457, 'epoch': 1} {'type': 'loss', 'content': 0.2609177529811859, 'timestamp': '2025-10-01 04:19:23.606685', 'step': 5458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:23.636941', 'step': 5458, 'epoch': 1} {'type': 'loss', 'content': 0.13247236609458923, 'timestamp': '2025-10-01 04:19:23.638989', 'step': 5459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:23.676571', 'step': 5459, 'epoch': 1} {'type': 'loss', 'content': 0.18507139384746552, 'timestamp': '2025-10-01 04:19:23.700235', 'step': 5460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:23.749530', 'step': 5460, 'epoch': 1} {'type': 'loss', 'content': 0.13400647044181824, 'timestamp': '2025-10-01 04:19:23.751140', 'step': 5461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:23.781779', 'step': 5461, 'epoch': 1} {'type': 'loss', 'content': 0.11546101421117783, 'timestamp': '2025-10-01 04:19:23.783793', 'step': 5462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:23.816525', 'step': 5462, 'epoch': 1} {'type': 'loss', 'content': 0.1299971044063568, 'timestamp': '2025-10-01 04:19:23.818890', 'step': 5463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:23.852716', 'step': 5463, 'epoch': 1} {'type': 'loss', 'content': 0.07683079689741135, 'timestamp': '2025-10-01 04:19:23.876578', 'step': 5464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:23.912276', 'step': 5464, 'epoch': 1} {'type': 'loss', 'content': 0.06813088059425354, 'timestamp': '2025-10-01 04:19:23.914327', 'step': 5465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:23.949706', 'step': 5465, 'epoch': 1} {'type': 'loss', 'content': 0.17990970611572266, 'timestamp': '2025-10-01 04:19:23.953854', 'step': 5466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:23.990217', 'step': 5466, 'epoch': 1} {'type': 'loss', 'content': 0.15732306241989136, 'timestamp': '2025-10-01 04:19:23.992498', 'step': 5467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:24.036029', 'step': 5467, 'epoch': 1} {'type': 'loss', 'content': 0.21626418828964233, 'timestamp': '2025-10-01 04:19:24.059844', 'step': 5468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:24.092182', 'step': 5468, 'epoch': 1} {'type': 'loss', 'content': 0.10573359578847885, 'timestamp': '2025-10-01 04:19:24.094166', 'step': 5469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:24.126237', 'step': 5469, 'epoch': 1} {'type': 'loss', 'content': 0.2273884266614914, 'timestamp': '2025-10-01 04:19:24.128261', 'step': 5470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:24.159090', 'step': 5470, 'epoch': 1} {'type': 'loss', 'content': 0.13055042922496796, 'timestamp': '2025-10-01 04:19:24.161339', 'step': 5471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:24.201932', 'step': 5471, 'epoch': 1} {'type': 'loss', 'content': 0.14026501774787903, 'timestamp': '2025-10-01 04:19:24.225518', 'step': 5472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:24.258481', 'step': 5472, 'epoch': 1} {'type': 'loss', 'content': 0.14343152940273285, 'timestamp': '2025-10-01 04:19:24.260369', 'step': 5473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:24.293537', 'step': 5473, 'epoch': 1} {'type': 'loss', 'content': 0.18580113351345062, 'timestamp': '2025-10-01 04:19:24.295342', 'step': 5474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:24.329383', 'step': 5474, 'epoch': 1} {'type': 'loss', 'content': 0.09773619472980499, 'timestamp': '2025-10-01 04:19:24.333419', 'step': 5475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:24.369227', 'step': 5475, 'epoch': 1} {'type': 'loss', 'content': 0.10725860297679901, 'timestamp': '2025-10-01 04:19:24.392716', 'step': 5476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:24.430996', 'step': 5476, 'epoch': 1} {'type': 'loss', 'content': 0.22420351207256317, 'timestamp': '2025-10-01 04:19:24.434149', 'step': 5477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:24.467058', 'step': 5477, 'epoch': 1} {'type': 'loss', 'content': 0.2295295000076294, 'timestamp': '2025-10-01 04:19:24.469201', 'step': 5478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:24.504326', 'step': 5478, 'epoch': 1} {'type': 'loss', 'content': 0.16141296923160553, 'timestamp': '2025-10-01 04:19:24.506753', 'step': 5479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:24.537330', 'step': 5479, 'epoch': 1} {'type': 'loss', 'content': 0.1174289733171463, 'timestamp': '2025-10-01 04:19:24.560838', 'step': 5480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:24.597041', 'step': 5480, 'epoch': 1} {'type': 'loss', 'content': 0.21540667116641998, 'timestamp': '2025-10-01 04:19:24.601645', 'step': 5481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:24.642453', 'step': 5481, 'epoch': 1} {'type': 'loss', 'content': 0.11904986202716827, 'timestamp': '2025-10-01 04:19:24.644871', 'step': 5482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:24.679260', 'step': 5482, 'epoch': 1} {'type': 'loss', 'content': 0.1343519240617752, 'timestamp': '2025-10-01 04:19:24.681572', 'step': 5483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:24.715033', 'step': 5483, 'epoch': 1} {'type': 'loss', 'content': 0.18485459685325623, 'timestamp': '2025-10-01 04:19:24.738154', 'step': 5484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:24.776988', 'step': 5484, 'epoch': 1} {'type': 'loss', 'content': 0.10748808830976486, 'timestamp': '2025-10-01 04:19:24.779813', 'step': 5485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:24.815853', 'step': 5485, 'epoch': 1} {'type': 'loss', 'content': 0.2170320749282837, 'timestamp': '2025-10-01 04:19:24.818082', 'step': 5486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:24.852806', 'step': 5486, 'epoch': 1} {'type': 'loss', 'content': 0.14443714916706085, 'timestamp': '2025-10-01 04:19:24.854776', 'step': 5487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:24.885800', 'step': 5487, 'epoch': 1} {'type': 'loss', 'content': 0.1467014104127884, 'timestamp': '2025-10-01 04:19:24.909144', 'step': 5488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:24.948009', 'step': 5488, 'epoch': 1} {'type': 'loss', 'content': 0.09712754935026169, 'timestamp': '2025-10-01 04:19:24.950534', 'step': 5489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:24.983333', 'step': 5489, 'epoch': 1} {'type': 'loss', 'content': 0.16151843965053558, 'timestamp': '2025-10-01 04:19:24.990244', 'step': 5490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:25.026842', 'step': 5490, 'epoch': 1} {'type': 'loss', 'content': 0.07450314611196518, 'timestamp': '2025-10-01 04:19:25.028765', 'step': 5491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:25.060803', 'step': 5491, 'epoch': 1} {'type': 'loss', 'content': 0.10010227560997009, 'timestamp': '2025-10-01 04:19:25.088420', 'step': 5492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:25.120503', 'step': 5492, 'epoch': 1} {'type': 'loss', 'content': 0.17364878952503204, 'timestamp': '2025-10-01 04:19:25.122431', 'step': 5493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:25.153616', 'step': 5493, 'epoch': 1} {'type': 'loss', 'content': 0.12288176268339157, 'timestamp': '2025-10-01 04:19:25.157048', 'step': 5494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:25.188571', 'step': 5494, 'epoch': 1} {'type': 'loss', 'content': 0.15977197885513306, 'timestamp': '2025-10-01 04:19:25.190288', 'step': 5495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:25.221434', 'step': 5495, 'epoch': 1} {'type': 'loss', 'content': 0.23079527914524078, 'timestamp': '2025-10-01 04:19:25.245065', 'step': 5496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:25.274839', 'step': 5496, 'epoch': 1} {'type': 'loss', 'content': 0.11983801424503326, 'timestamp': '2025-10-01 04:19:25.278199', 'step': 5497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:25.310342', 'step': 5497, 'epoch': 1} {'type': 'loss', 'content': 0.10152792185544968, 'timestamp': '2025-10-01 04:19:25.312643', 'step': 5498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:25.343669', 'step': 5498, 'epoch': 1} {'type': 'loss', 'content': 0.18445058166980743, 'timestamp': '2025-10-01 04:19:25.345746', 'step': 5499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:25.375699', 'step': 5499, 'epoch': 1} {'type': 'loss', 'content': 0.18881262838840485, 'timestamp': '2025-10-01 04:19:25.399636', 'step': 5500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-10-01 04:19:30.760154', 'step': 5500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:30.801393', 'step': 5500, 'epoch': 1} {'type': 'loss', 'content': 0.1833772212266922, 'timestamp': '2025-10-01 04:19:30.803379', 'step': 5501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:30.835646', 'step': 5501, 'epoch': 1} {'type': 'loss', 'content': 0.26639533042907715, 'timestamp': '2025-10-01 04:19:30.837750', 'step': 5502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:30.868011', 'step': 5502, 'epoch': 1} {'type': 'loss', 'content': 0.02189168706536293, 'timestamp': '2025-10-01 04:19:30.869999', 'step': 5503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:30.900900', 'step': 5503, 'epoch': 1} {'type': 'loss', 'content': 0.11954287439584732, 'timestamp': '2025-10-01 04:19:30.929667', 'step': 5504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:30.962683', 'step': 5504, 'epoch': 1} {'type': 'loss', 'content': 0.12224850803613663, 'timestamp': '2025-10-01 04:19:30.964793', 'step': 5505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:19:30.996122', 'step': 5505, 'epoch': 1} {'type': 'loss', 'content': 0.19280940294265747, 'timestamp': '2025-10-01 04:19:31.002129', 'step': 5506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:31.034234', 'step': 5506, 'epoch': 1} {'type': 'loss', 'content': 0.19774174690246582, 'timestamp': '2025-10-01 04:19:31.036348', 'step': 5507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:31.079070', 'step': 5507, 'epoch': 1} {'type': 'loss', 'content': 0.1540585309267044, 'timestamp': '2025-10-01 04:19:31.102554', 'step': 5508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:31.133307', 'step': 5508, 'epoch': 1} {'type': 'loss', 'content': 0.15944921970367432, 'timestamp': '2025-10-01 04:19:31.135297', 'step': 5509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:31.165645', 'step': 5509, 'epoch': 1} {'type': 'loss', 'content': 0.1756284534931183, 'timestamp': '2025-10-01 04:19:31.167907', 'step': 5510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:31.198878', 'step': 5510, 'epoch': 1} {'type': 'loss', 'content': 0.17268498241901398, 'timestamp': '2025-10-01 04:19:31.201009', 'step': 5511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:31.232647', 'step': 5511, 'epoch': 1} {'type': 'loss', 'content': 0.11517050117254257, 'timestamp': '2025-10-01 04:19:31.256069', 'step': 5512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:31.285882', 'step': 5512, 'epoch': 1} {'type': 'loss', 'content': 0.20628391206264496, 'timestamp': '2025-10-01 04:19:31.287856', 'step': 5513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:31.317950', 'step': 5513, 'epoch': 1} {'type': 'loss', 'content': 0.1348961442708969, 'timestamp': '2025-10-01 04:19:31.319917', 'step': 5514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:31.351382', 'step': 5514, 'epoch': 1} {'type': 'loss', 'content': 0.14562556147575378, 'timestamp': '2025-10-01 04:19:31.353778', 'step': 5515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:31.385598', 'step': 5515, 'epoch': 1} {'type': 'loss', 'content': 0.08149583637714386, 'timestamp': '2025-10-01 04:19:31.409694', 'step': 5516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:31.442019', 'step': 5516, 'epoch': 1} {'type': 'loss', 'content': 0.12196428328752518, 'timestamp': '2025-10-01 04:19:31.444005', 'step': 5517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:31.479362', 'step': 5517, 'epoch': 1} {'type': 'loss', 'content': 0.17187727987766266, 'timestamp': '2025-10-01 04:19:31.481316', 'step': 5518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:31.512258', 'step': 5518, 'epoch': 1} {'type': 'loss', 'content': 0.10109921544790268, 'timestamp': '2025-10-01 04:19:31.514881', 'step': 5519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:31.548935', 'step': 5519, 'epoch': 1} {'type': 'loss', 'content': 0.0920373722910881, 'timestamp': '2025-10-01 04:19:31.572585', 'step': 5520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:31.603684', 'step': 5520, 'epoch': 1} {'type': 'loss', 'content': 0.16567616164684296, 'timestamp': '2025-10-01 04:19:31.605727', 'step': 5521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:31.637300', 'step': 5521, 'epoch': 1} {'type': 'loss', 'content': 0.09218209236860275, 'timestamp': '2025-10-01 04:19:31.639369', 'step': 5522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:31.670412', 'step': 5522, 'epoch': 1} {'type': 'loss', 'content': 0.14006678760051727, 'timestamp': '2025-10-01 04:19:31.672442', 'step': 5523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:31.708598', 'step': 5523, 'epoch': 1} {'type': 'loss', 'content': 0.13499978184700012, 'timestamp': '2025-10-01 04:19:31.732079', 'step': 5524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:31.762719', 'step': 5524, 'epoch': 1} {'type': 'loss', 'content': 0.0988205075263977, 'timestamp': '2025-10-01 04:19:31.764909', 'step': 5525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:19:31.797952', 'step': 5525, 'epoch': 1} {'type': 'loss', 'content': 0.2031860053539276, 'timestamp': '2025-10-01 04:19:31.804926', 'step': 5526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:19:31.835385', 'step': 5526, 'epoch': 1} {'type': 'loss', 'content': 0.14232628047466278, 'timestamp': '2025-10-01 04:19:31.839466', 'step': 5527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:31.877624', 'step': 5527, 'epoch': 1} {'type': 'loss', 'content': 0.1710146963596344, 'timestamp': '2025-10-01 04:19:31.901218', 'step': 5528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:31.933291', 'step': 5528, 'epoch': 1} {'type': 'loss', 'content': 0.08476193994283676, 'timestamp': '2025-10-01 04:19:31.935585', 'step': 5529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:31.966334', 'step': 5529, 'epoch': 1} {'type': 'loss', 'content': 0.15080870687961578, 'timestamp': '2025-10-01 04:19:31.968355', 'step': 5530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:32.001467', 'step': 5530, 'epoch': 1} {'type': 'loss', 'content': 0.13231346011161804, 'timestamp': '2025-10-01 04:19:32.003571', 'step': 5531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:32.036686', 'step': 5531, 'epoch': 1} {'type': 'loss', 'content': 0.2179868370294571, 'timestamp': '2025-10-01 04:19:32.060138', 'step': 5532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:32.090871', 'step': 5532, 'epoch': 1} {'type': 'loss', 'content': 0.1454334855079651, 'timestamp': '2025-10-01 04:19:32.096218', 'step': 5533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:32.129612', 'step': 5533, 'epoch': 1} {'type': 'loss', 'content': 0.25805386900901794, 'timestamp': '2025-10-01 04:19:32.131755', 'step': 5534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:19:32.162413', 'step': 5534, 'epoch': 1} {'type': 'loss', 'content': 0.16323933005332947, 'timestamp': '2025-10-01 04:19:32.166992', 'step': 5535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:32.200837', 'step': 5535, 'epoch': 1} {'type': 'loss', 'content': 0.1610502451658249, 'timestamp': '2025-10-01 04:19:32.224328', 'step': 5536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:32.258585', 'step': 5536, 'epoch': 1} {'type': 'loss', 'content': 0.12560778856277466, 'timestamp': '2025-10-01 04:19:32.260463', 'step': 5537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:32.294170', 'step': 5537, 'epoch': 1} {'type': 'loss', 'content': 0.189213365316391, 'timestamp': '2025-10-01 04:19:32.297164', 'step': 5538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:32.335238', 'step': 5538, 'epoch': 1} {'type': 'loss', 'content': 0.1871763914823532, 'timestamp': '2025-10-01 04:19:32.338069', 'step': 5539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:32.373338', 'step': 5539, 'epoch': 1} {'type': 'loss', 'content': 0.15284673869609833, 'timestamp': '2025-10-01 04:19:32.397143', 'step': 5540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:32.428335', 'step': 5540, 'epoch': 1} {'type': 'loss', 'content': 0.10504818707704544, 'timestamp': '2025-10-01 04:19:32.430255', 'step': 5541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:32.467130', 'step': 5541, 'epoch': 1} {'type': 'loss', 'content': 0.1410733163356781, 'timestamp': '2025-10-01 04:19:32.469111', 'step': 5542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:32.510717', 'step': 5542, 'epoch': 1} {'type': 'loss', 'content': 0.1158980280160904, 'timestamp': '2025-10-01 04:19:32.512807', 'step': 5543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:32.553584', 'step': 5543, 'epoch': 1} {'type': 'loss', 'content': 0.1747506707906723, 'timestamp': '2025-10-01 04:19:32.577037', 'step': 5544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:32.610586', 'step': 5544, 'epoch': 1} {'type': 'loss', 'content': 0.10526887327432632, 'timestamp': '2025-10-01 04:19:32.612592', 'step': 5545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:32.644430', 'step': 5545, 'epoch': 1} {'type': 'loss', 'content': 0.23229707777500153, 'timestamp': '2025-10-01 04:19:32.646342', 'step': 5546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:32.687321', 'step': 5546, 'epoch': 1} {'type': 'loss', 'content': 0.14056849479675293, 'timestamp': '2025-10-01 04:19:32.691667', 'step': 5547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:32.726796', 'step': 5547, 'epoch': 1} {'type': 'loss', 'content': 0.19907402992248535, 'timestamp': '2025-10-01 04:19:32.750346', 'step': 5548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:32.781622', 'step': 5548, 'epoch': 1} {'type': 'loss', 'content': 0.16972452402114868, 'timestamp': '2025-10-01 04:19:32.784130', 'step': 5549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:32.821014', 'step': 5549, 'epoch': 1} {'type': 'loss', 'content': 0.16157856583595276, 'timestamp': '2025-10-01 04:19:32.823128', 'step': 5550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:32.861584', 'step': 5550, 'epoch': 1} {'type': 'loss', 'content': 0.2905413508415222, 'timestamp': '2025-10-01 04:19:32.863525', 'step': 5551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:32.897225', 'step': 5551, 'epoch': 1} {'type': 'loss', 'content': 0.22863589227199554, 'timestamp': '2025-10-01 04:19:32.921548', 'step': 5552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:32.959243', 'step': 5552, 'epoch': 1} {'type': 'loss', 'content': 0.2588467001914978, 'timestamp': '2025-10-01 04:19:32.961397', 'step': 5553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:32.994554', 'step': 5553, 'epoch': 1} {'type': 'loss', 'content': 0.17762795090675354, 'timestamp': '2025-10-01 04:19:32.997018', 'step': 5554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:33.030322', 'step': 5554, 'epoch': 1} {'type': 'loss', 'content': 0.11845552921295166, 'timestamp': '2025-10-01 04:19:33.032829', 'step': 5555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:33.062533', 'step': 5555, 'epoch': 1} {'type': 'loss', 'content': 0.13567934930324554, 'timestamp': '2025-10-01 04:19:33.092741', 'step': 5556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:33.130054', 'step': 5556, 'epoch': 1} {'type': 'loss', 'content': 0.20724833011627197, 'timestamp': '2025-10-01 04:19:33.131982', 'step': 5557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:33.166033', 'step': 5557, 'epoch': 1} {'type': 'loss', 'content': 0.17670586705207825, 'timestamp': '2025-10-01 04:19:33.168524', 'step': 5558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:33.199462', 'step': 5558, 'epoch': 1} {'type': 'loss', 'content': 0.137455552816391, 'timestamp': '2025-10-01 04:19:33.213726', 'step': 5559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:33.248888', 'step': 5559, 'epoch': 1} {'type': 'loss', 'content': 0.07756063342094421, 'timestamp': '2025-10-01 04:19:33.272374', 'step': 5560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:33.306789', 'step': 5560, 'epoch': 1} {'type': 'loss', 'content': 0.10250744968652725, 'timestamp': '2025-10-01 04:19:33.309791', 'step': 5561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:33.340334', 'step': 5561, 'epoch': 1} {'type': 'loss', 'content': 0.11860011518001556, 'timestamp': '2025-10-01 04:19:33.342447', 'step': 5562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:19:33.376744', 'step': 5562, 'epoch': 1} {'type': 'loss', 'content': 0.23934698104858398, 'timestamp': '2025-10-01 04:19:33.381014', 'step': 5563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:19:33.411856', 'step': 5563, 'epoch': 1} {'type': 'loss', 'content': 0.13285787403583527, 'timestamp': '2025-10-01 04:19:33.437530', 'step': 5564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:19:33.467750', 'step': 5564, 'epoch': 1} {'type': 'loss', 'content': 0.1564362347126007, 'timestamp': '2025-10-01 04:19:33.469890', 'step': 5565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:33.500321', 'step': 5565, 'epoch': 1} {'type': 'loss', 'content': 0.2598883807659149, 'timestamp': '2025-10-01 04:19:33.502378', 'step': 5566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:33.532767', 'step': 5566, 'epoch': 1} {'type': 'loss', 'content': 0.14347368478775024, 'timestamp': '2025-10-01 04:19:33.534738', 'step': 5567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:33.566262', 'step': 5567, 'epoch': 1} {'type': 'loss', 'content': 0.16893145442008972, 'timestamp': '2025-10-01 04:19:33.589905', 'step': 5568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:33.621786', 'step': 5568, 'epoch': 1} {'type': 'loss', 'content': 0.24242693185806274, 'timestamp': '2025-10-01 04:19:33.623982', 'step': 5569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:33.656026', 'step': 5569, 'epoch': 1} {'type': 'loss', 'content': 0.14346615970134735, 'timestamp': '2025-10-01 04:19:33.658059', 'step': 5570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:33.694012', 'step': 5570, 'epoch': 1} {'type': 'loss', 'content': 0.18162024021148682, 'timestamp': '2025-10-01 04:19:33.696066', 'step': 5571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:33.731326', 'step': 5571, 'epoch': 1} {'type': 'loss', 'content': 0.10521228611469269, 'timestamp': '2025-10-01 04:19:33.760552', 'step': 5572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:33.790554', 'step': 5572, 'epoch': 1} {'type': 'loss', 'content': 0.08841690421104431, 'timestamp': '2025-10-01 04:19:33.792577', 'step': 5573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:33.823015', 'step': 5573, 'epoch': 1} {'type': 'loss', 'content': 0.14235785603523254, 'timestamp': '2025-10-01 04:19:33.825177', 'step': 5574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:19:33.856191', 'step': 5574, 'epoch': 1} {'type': 'loss', 'content': 0.11179076135158539, 'timestamp': '2025-10-01 04:19:33.860520', 'step': 5575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:33.894630', 'step': 5575, 'epoch': 1} {'type': 'loss', 'content': 0.11072216928005219, 'timestamp': '2025-10-01 04:19:33.918315', 'step': 5576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:33.962877', 'step': 5576, 'epoch': 1} {'type': 'loss', 'content': 0.14155028760433197, 'timestamp': '2025-10-01 04:19:33.965099', 'step': 5577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:33.997737', 'step': 5577, 'epoch': 1} {'type': 'loss', 'content': 0.12244565039873123, 'timestamp': '2025-10-01 04:19:33.999877', 'step': 5578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:34.030417', 'step': 5578, 'epoch': 1} {'type': 'loss', 'content': 0.12087692320346832, 'timestamp': '2025-10-01 04:19:34.032510', 'step': 5579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:34.062141', 'step': 5579, 'epoch': 1} {'type': 'loss', 'content': 0.16367733478546143, 'timestamp': '2025-10-01 04:19:34.086659', 'step': 5580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:34.126343', 'step': 5580, 'epoch': 1} {'type': 'loss', 'content': 0.09556473046541214, 'timestamp': '2025-10-01 04:19:34.128348', 'step': 5581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:34.161789', 'step': 5581, 'epoch': 1} {'type': 'loss', 'content': 0.12666165828704834, 'timestamp': '2025-10-01 04:19:34.163857', 'step': 5582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:34.204284', 'step': 5582, 'epoch': 1} {'type': 'loss', 'content': 0.12273600697517395, 'timestamp': '2025-10-01 04:19:34.206585', 'step': 5583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:34.238078', 'step': 5583, 'epoch': 1} {'type': 'loss', 'content': 0.16297385096549988, 'timestamp': '2025-10-01 04:19:34.262793', 'step': 5584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:34.293158', 'step': 5584, 'epoch': 1} {'type': 'loss', 'content': 0.26761066913604736, 'timestamp': '2025-10-01 04:19:34.295430', 'step': 5585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:34.328442', 'step': 5585, 'epoch': 1} {'type': 'loss', 'content': 0.06938012689352036, 'timestamp': '2025-10-01 04:19:34.330439', 'step': 5586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:34.369899', 'step': 5586, 'epoch': 1} {'type': 'loss', 'content': 0.09225903451442719, 'timestamp': '2025-10-01 04:19:34.371957', 'step': 5587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:34.401763', 'step': 5587, 'epoch': 1} {'type': 'loss', 'content': 0.12257437407970428, 'timestamp': '2025-10-01 04:19:34.427869', 'step': 5588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:34.461844', 'step': 5588, 'epoch': 1} {'type': 'loss', 'content': 0.1323462575674057, 'timestamp': '2025-10-01 04:19:34.464110', 'step': 5589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:34.497673', 'step': 5589, 'epoch': 1} {'type': 'loss', 'content': 0.09719909727573395, 'timestamp': '2025-10-01 04:19:34.499747', 'step': 5590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:34.529845', 'step': 5590, 'epoch': 1} {'type': 'loss', 'content': 0.19779729843139648, 'timestamp': '2025-10-01 04:19:34.531893', 'step': 5591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:34.562364', 'step': 5591, 'epoch': 1} {'type': 'loss', 'content': 0.07415604591369629, 'timestamp': '2025-10-01 04:19:34.585975', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:19:43.908729', 'step': 5592, 'epoch': 1} {'type': 'pplx', 'content': 7879.385081975291, 'timestamp': '2025-10-01 04:19:43.911431', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:43.942507', 'step': 5592, 'epoch': 1} {'type': 'loss', 'content': 0.14001883566379547, 'timestamp': '2025-10-01 04:19:43.944760', 'step': 5593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:43.978419', 'step': 5593, 'epoch': 1} {'type': 'loss', 'content': 0.12331104278564453, 'timestamp': '2025-10-01 04:19:43.980208', 'step': 5594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:44.010948', 'step': 5594, 'epoch': 1} {'type': 'loss', 'content': 0.17685601115226746, 'timestamp': '2025-10-01 04:19:44.012850', 'step': 5595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:44.043437', 'step': 5595, 'epoch': 1} {'type': 'loss', 'content': 0.17535890638828278, 'timestamp': '2025-10-01 04:19:44.067137', 'step': 5596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.097684', 'step': 5596, 'epoch': 1} {'type': 'loss', 'content': 0.302242636680603, 'timestamp': '2025-10-01 04:19:44.099684', 'step': 5597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.131453', 'step': 5597, 'epoch': 1} {'type': 'loss', 'content': 0.1130334734916687, 'timestamp': '2025-10-01 04:19:44.133669', 'step': 5598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:44.165272', 'step': 5598, 'epoch': 1} {'type': 'loss', 'content': 0.14879751205444336, 'timestamp': '2025-10-01 04:19:44.167162', 'step': 5599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:44.198693', 'step': 5599, 'epoch': 1} {'type': 'loss', 'content': 0.12720438838005066, 'timestamp': '2025-10-01 04:19:44.222421', 'step': 5600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.253462', 'step': 5600, 'epoch': 1} {'type': 'loss', 'content': 0.1785038411617279, 'timestamp': '2025-10-01 04:19:44.255422', 'step': 5601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:44.285858', 'step': 5601, 'epoch': 1} {'type': 'loss', 'content': 0.15236809849739075, 'timestamp': '2025-10-01 04:19:44.287839', 'step': 5602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.317616', 'step': 5602, 'epoch': 1} {'type': 'loss', 'content': 0.20339632034301758, 'timestamp': '2025-10-01 04:19:44.319645', 'step': 5603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.358443', 'step': 5603, 'epoch': 1} {'type': 'loss', 'content': 0.13610553741455078, 'timestamp': '2025-10-01 04:19:44.381982', 'step': 5604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.412506', 'step': 5604, 'epoch': 1} {'type': 'loss', 'content': 0.14780017733573914, 'timestamp': '2025-10-01 04:19:44.415461', 'step': 5605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:44.446368', 'step': 5605, 'epoch': 1} {'type': 'loss', 'content': 0.06692610681056976, 'timestamp': '2025-10-01 04:19:44.448569', 'step': 5606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:44.480207', 'step': 5606, 'epoch': 1} {'type': 'loss', 'content': 0.1616295725107193, 'timestamp': '2025-10-01 04:19:44.483511', 'step': 5607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:44.513883', 'step': 5607, 'epoch': 1} {'type': 'loss', 'content': 0.21962247788906097, 'timestamp': '2025-10-01 04:19:44.538545', 'step': 5608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.570270', 'step': 5608, 'epoch': 1} {'type': 'loss', 'content': 0.1826576292514801, 'timestamp': '2025-10-01 04:19:44.577318', 'step': 5609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:44.609036', 'step': 5609, 'epoch': 1} {'type': 'loss', 'content': 0.11440828442573547, 'timestamp': '2025-10-01 04:19:44.611480', 'step': 5610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.641522', 'step': 5610, 'epoch': 1} {'type': 'loss', 'content': 0.0827353224158287, 'timestamp': '2025-10-01 04:19:44.643465', 'step': 5611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:44.673850', 'step': 5611, 'epoch': 1} {'type': 'loss', 'content': 0.12185850739479065, 'timestamp': '2025-10-01 04:19:44.697227', 'step': 5612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:44.728044', 'step': 5612, 'epoch': 1} {'type': 'loss', 'content': 0.16399529576301575, 'timestamp': '2025-10-01 04:19:44.730256', 'step': 5613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.760561', 'step': 5613, 'epoch': 1} {'type': 'loss', 'content': 0.17722827196121216, 'timestamp': '2025-10-01 04:19:44.762631', 'step': 5614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.794436', 'step': 5614, 'epoch': 1} {'type': 'loss', 'content': 0.13455446064472198, 'timestamp': '2025-10-01 04:19:44.796500', 'step': 5615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:44.827694', 'step': 5615, 'epoch': 1} {'type': 'loss', 'content': 0.12118232250213623, 'timestamp': '2025-10-01 04:19:44.851238', 'step': 5616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.881536', 'step': 5616, 'epoch': 1} {'type': 'loss', 'content': 0.12542042136192322, 'timestamp': '2025-10-01 04:19:44.884385', 'step': 5617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.915802', 'step': 5617, 'epoch': 1} {'type': 'loss', 'content': 0.17930865287780762, 'timestamp': '2025-10-01 04:19:44.917917', 'step': 5618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:44.948984', 'step': 5618, 'epoch': 1} {'type': 'loss', 'content': 0.10645724833011627, 'timestamp': '2025-10-01 04:19:44.951072', 'step': 5619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:44.982835', 'step': 5619, 'epoch': 1} {'type': 'loss', 'content': 0.16078530251979828, 'timestamp': '2025-10-01 04:19:45.007226', 'step': 5620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:45.042570', 'step': 5620, 'epoch': 1} {'type': 'loss', 'content': 0.273356556892395, 'timestamp': '2025-10-01 04:19:45.044941', 'step': 5621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:45.075234', 'step': 5621, 'epoch': 1} {'type': 'loss', 'content': 0.1619429886341095, 'timestamp': '2025-10-01 04:19:45.078085', 'step': 5622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:45.108601', 'step': 5622, 'epoch': 1} {'type': 'loss', 'content': 0.13883450627326965, 'timestamp': '2025-10-01 04:19:45.110643', 'step': 5623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:45.140365', 'step': 5623, 'epoch': 1} {'type': 'loss', 'content': 0.17252279818058014, 'timestamp': '2025-10-01 04:19:45.164216', 'step': 5624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:45.194346', 'step': 5624, 'epoch': 1} {'type': 'loss', 'content': 0.2052391618490219, 'timestamp': '2025-10-01 04:19:45.197740', 'step': 5625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:45.228036', 'step': 5625, 'epoch': 1} {'type': 'loss', 'content': 0.20071662962436676, 'timestamp': '2025-10-01 04:19:45.231163', 'step': 5626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:45.262201', 'step': 5626, 'epoch': 1} {'type': 'loss', 'content': 0.3204132914543152, 'timestamp': '2025-10-01 04:19:45.264591', 'step': 5627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:45.295958', 'step': 5627, 'epoch': 1} {'type': 'loss', 'content': 0.1576220989227295, 'timestamp': '2025-10-01 04:19:45.320200', 'step': 5628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:45.351732', 'step': 5628, 'epoch': 1} {'type': 'loss', 'content': 0.1914144605398178, 'timestamp': '2025-10-01 04:19:45.353655', 'step': 5629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:45.385286', 'step': 5629, 'epoch': 1} {'type': 'loss', 'content': 0.2196216881275177, 'timestamp': '2025-10-01 04:19:45.387572', 'step': 5630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:45.423958', 'step': 5630, 'epoch': 1} {'type': 'loss', 'content': 0.17090071737766266, 'timestamp': '2025-10-01 04:19:45.427199', 'step': 5631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:45.457891', 'step': 5631, 'epoch': 1} {'type': 'loss', 'content': 0.17166170477867126, 'timestamp': '2025-10-01 04:19:45.482662', 'step': 5632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:45.512449', 'step': 5632, 'epoch': 1} {'type': 'loss', 'content': 0.16655173897743225, 'timestamp': '2025-10-01 04:19:45.523653', 'step': 5633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:45.554878', 'step': 5633, 'epoch': 1} {'type': 'loss', 'content': 0.14729084074497223, 'timestamp': '2025-10-01 04:19:45.556999', 'step': 5634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:45.588020', 'step': 5634, 'epoch': 1} {'type': 'loss', 'content': 0.19960738718509674, 'timestamp': '2025-10-01 04:19:45.590184', 'step': 5635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:45.625364', 'step': 5635, 'epoch': 1} {'type': 'loss', 'content': 0.1966332644224167, 'timestamp': '2025-10-01 04:19:45.649005', 'step': 5636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:45.680376', 'step': 5636, 'epoch': 1} {'type': 'loss', 'content': 0.16038158535957336, 'timestamp': '2025-10-01 04:19:45.682994', 'step': 5637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:19:45.714188', 'step': 5637, 'epoch': 1} {'type': 'loss', 'content': 0.1521347612142563, 'timestamp': '2025-10-01 04:19:45.718640', 'step': 5638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:45.749676', 'step': 5638, 'epoch': 1} {'type': 'loss', 'content': 0.18726034462451935, 'timestamp': '2025-10-01 04:19:45.760058', 'step': 5639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:45.791523', 'step': 5639, 'epoch': 1} {'type': 'loss', 'content': 0.1722412258386612, 'timestamp': '2025-10-01 04:19:45.815010', 'step': 5640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:45.845594', 'step': 5640, 'epoch': 1} {'type': 'loss', 'content': 0.14079555869102478, 'timestamp': '2025-10-01 04:19:45.847659', 'step': 5641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:45.877994', 'step': 5641, 'epoch': 1} {'type': 'loss', 'content': 0.24551929533481598, 'timestamp': '2025-10-01 04:19:45.886689', 'step': 5642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:45.917668', 'step': 5642, 'epoch': 1} {'type': 'loss', 'content': 0.25403398275375366, 'timestamp': '2025-10-01 04:19:45.919665', 'step': 5643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:45.950575', 'step': 5643, 'epoch': 1} {'type': 'loss', 'content': 0.16393208503723145, 'timestamp': '2025-10-01 04:19:45.974179', 'step': 5644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:46.007977', 'step': 5644, 'epoch': 1} {'type': 'loss', 'content': 0.15543708205223083, 'timestamp': '2025-10-01 04:19:46.016817', 'step': 5645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:46.048924', 'step': 5645, 'epoch': 1} {'type': 'loss', 'content': 0.17691533267498016, 'timestamp': '2025-10-01 04:19:46.051007', 'step': 5646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:46.082274', 'step': 5646, 'epoch': 1} {'type': 'loss', 'content': 0.08249307423830032, 'timestamp': '2025-10-01 04:19:46.084547', 'step': 5647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:19:46.115919', 'step': 5647, 'epoch': 1} {'type': 'loss', 'content': 0.19801612198352814, 'timestamp': '2025-10-01 04:19:46.141302', 'step': 5648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:46.171691', 'step': 5648, 'epoch': 1} {'type': 'loss', 'content': 0.15885771811008453, 'timestamp': '2025-10-01 04:19:46.174022', 'step': 5649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:46.208061', 'step': 5649, 'epoch': 1} {'type': 'loss', 'content': 0.15663935244083405, 'timestamp': '2025-10-01 04:19:46.210215', 'step': 5650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:46.243017', 'step': 5650, 'epoch': 1} {'type': 'loss', 'content': 0.12434446066617966, 'timestamp': '2025-10-01 04:19:46.245175', 'step': 5651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:46.274483', 'step': 5651, 'epoch': 1} {'type': 'loss', 'content': 0.04798900708556175, 'timestamp': '2025-10-01 04:19:46.298057', 'step': 5652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:46.329138', 'step': 5652, 'epoch': 1} {'type': 'loss', 'content': 0.1132817342877388, 'timestamp': '2025-10-01 04:19:46.331099', 'step': 5653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:46.370066', 'step': 5653, 'epoch': 1} {'type': 'loss', 'content': 0.19906824827194214, 'timestamp': '2025-10-01 04:19:46.372610', 'step': 5654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:46.403484', 'step': 5654, 'epoch': 1} {'type': 'loss', 'content': 0.15452110767364502, 'timestamp': '2025-10-01 04:19:46.405575', 'step': 5655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:46.437121', 'step': 5655, 'epoch': 1} {'type': 'loss', 'content': 0.17516447603702545, 'timestamp': '2025-10-01 04:19:46.461403', 'step': 5656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:46.492393', 'step': 5656, 'epoch': 1} {'type': 'loss', 'content': 0.14074374735355377, 'timestamp': '2025-10-01 04:19:46.494405', 'step': 5657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:46.526672', 'step': 5657, 'epoch': 1} {'type': 'loss', 'content': 0.2484458088874817, 'timestamp': '2025-10-01 04:19:46.528868', 'step': 5658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:46.559345', 'step': 5658, 'epoch': 1} {'type': 'loss', 'content': 0.16825643181800842, 'timestamp': '2025-10-01 04:19:46.561561', 'step': 5659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:46.591957', 'step': 5659, 'epoch': 1} {'type': 'loss', 'content': 0.0945902168750763, 'timestamp': '2025-10-01 04:19:46.615750', 'step': 5660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:46.657673', 'step': 5660, 'epoch': 1} {'type': 'loss', 'content': 0.17284391820430756, 'timestamp': '2025-10-01 04:19:46.659770', 'step': 5661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:46.689863', 'step': 5661, 'epoch': 1} {'type': 'loss', 'content': 0.16668853163719177, 'timestamp': '2025-10-01 04:19:46.692247', 'step': 5662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:46.723274', 'step': 5662, 'epoch': 1} {'type': 'loss', 'content': 0.10480152815580368, 'timestamp': '2025-10-01 04:19:46.725476', 'step': 5663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:46.756480', 'step': 5663, 'epoch': 1} {'type': 'loss', 'content': 0.1336534172296524, 'timestamp': '2025-10-01 04:19:46.780181', 'step': 5664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:46.811914', 'step': 5664, 'epoch': 1} {'type': 'loss', 'content': 0.10380137711763382, 'timestamp': '2025-10-01 04:19:46.814277', 'step': 5665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:46.846571', 'step': 5665, 'epoch': 1} {'type': 'loss', 'content': 0.074150450527668, 'timestamp': '2025-10-01 04:19:46.848773', 'step': 5666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:46.879124', 'step': 5666, 'epoch': 1} {'type': 'loss', 'content': 0.10360030829906464, 'timestamp': '2025-10-01 04:19:46.881228', 'step': 5667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:46.911940', 'step': 5667, 'epoch': 1} {'type': 'loss', 'content': 0.13581404089927673, 'timestamp': '2025-10-01 04:19:46.935565', 'step': 5668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:19:46.965902', 'step': 5668, 'epoch': 1} {'type': 'loss', 'content': 0.13294291496276855, 'timestamp': '2025-10-01 04:19:46.976222', 'step': 5669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:47.007534', 'step': 5669, 'epoch': 1} {'type': 'loss', 'content': 0.07382941246032715, 'timestamp': '2025-10-01 04:19:47.009854', 'step': 5670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:47.041415', 'step': 5670, 'epoch': 1} {'type': 'loss', 'content': 0.14695416390895844, 'timestamp': '2025-10-01 04:19:47.044179', 'step': 5671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:47.076004', 'step': 5671, 'epoch': 1} {'type': 'loss', 'content': 0.15056034922599792, 'timestamp': '2025-10-01 04:19:47.099602', 'step': 5672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:47.130195', 'step': 5672, 'epoch': 1} {'type': 'loss', 'content': 0.24310201406478882, 'timestamp': '2025-10-01 04:19:47.132303', 'step': 5673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:47.163414', 'step': 5673, 'epoch': 1} {'type': 'loss', 'content': 0.12800979614257812, 'timestamp': '2025-10-01 04:19:47.166087', 'step': 5674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:47.196231', 'step': 5674, 'epoch': 1} {'type': 'loss', 'content': 0.17096924781799316, 'timestamp': '2025-10-01 04:19:47.203416', 'step': 5675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:47.241576', 'step': 5675, 'epoch': 1} {'type': 'loss', 'content': 0.17462648451328278, 'timestamp': '2025-10-01 04:19:47.265383', 'step': 5676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:47.295837', 'step': 5676, 'epoch': 1} {'type': 'loss', 'content': 0.13707193732261658, 'timestamp': '2025-10-01 04:19:47.297806', 'step': 5677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:47.328184', 'step': 5677, 'epoch': 1} {'type': 'loss', 'content': 0.11368541419506073, 'timestamp': '2025-10-01 04:19:47.330347', 'step': 5678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:47.363016', 'step': 5678, 'epoch': 1} {'type': 'loss', 'content': 0.0796641930937767, 'timestamp': '2025-10-01 04:19:47.365003', 'step': 5679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:47.395791', 'step': 5679, 'epoch': 1} {'type': 'loss', 'content': 0.14399155974388123, 'timestamp': '2025-10-01 04:19:47.419748', 'step': 5680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:47.460093', 'step': 5680, 'epoch': 1} {'type': 'loss', 'content': 0.17390292882919312, 'timestamp': '2025-10-01 04:19:47.462399', 'step': 5681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:47.494118', 'step': 5681, 'epoch': 1} {'type': 'loss', 'content': 0.15862849354743958, 'timestamp': '2025-10-01 04:19:47.496377', 'step': 5682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:47.528317', 'step': 5682, 'epoch': 1} {'type': 'loss', 'content': 0.15609851479530334, 'timestamp': '2025-10-01 04:19:47.530535', 'step': 5683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:47.562135', 'step': 5683, 'epoch': 1} {'type': 'loss', 'content': 0.24887830018997192, 'timestamp': '2025-10-01 04:19:47.585852', 'step': 5684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:47.618657', 'step': 5684, 'epoch': 1} {'type': 'loss', 'content': 0.15202228724956512, 'timestamp': '2025-10-01 04:19:47.620917', 'step': 5685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:47.651841', 'step': 5685, 'epoch': 1} {'type': 'loss', 'content': 0.18304438889026642, 'timestamp': '2025-10-01 04:19:47.654527', 'step': 5686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:47.686674', 'step': 5686, 'epoch': 1} {'type': 'loss', 'content': 0.1384705901145935, 'timestamp': '2025-10-01 04:19:47.689192', 'step': 5687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:47.720823', 'step': 5687, 'epoch': 1} {'type': 'loss', 'content': 0.20621879398822784, 'timestamp': '2025-10-01 04:19:47.744890', 'step': 5688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:47.777218', 'step': 5688, 'epoch': 1} {'type': 'loss', 'content': 0.16491509974002838, 'timestamp': '2025-10-01 04:19:47.779578', 'step': 5689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:47.824895', 'step': 5689, 'epoch': 1} {'type': 'loss', 'content': 0.2090306431055069, 'timestamp': '2025-10-01 04:19:47.826871', 'step': 5690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:47.879046', 'step': 5690, 'epoch': 1} {'type': 'loss', 'content': 0.2262493371963501, 'timestamp': '2025-10-01 04:19:47.881506', 'step': 5691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:47.925144', 'step': 5691, 'epoch': 1} {'type': 'loss', 'content': 0.1438775658607483, 'timestamp': '2025-10-01 04:19:47.948809', 'step': 5692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:48.000044', 'step': 5692, 'epoch': 1} {'type': 'loss', 'content': 0.11210677772760391, 'timestamp': '2025-10-01 04:19:48.002028', 'step': 5693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:48.068790', 'step': 5693, 'epoch': 1} {'type': 'loss', 'content': 0.26251736283302307, 'timestamp': '2025-10-01 04:19:48.071183', 'step': 5694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:48.108650', 'step': 5694, 'epoch': 1} {'type': 'loss', 'content': 0.13464218378067017, 'timestamp': '2025-10-01 04:19:48.111118', 'step': 5695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:48.149690', 'step': 5695, 'epoch': 1} {'type': 'loss', 'content': 0.25397613644599915, 'timestamp': '2025-10-01 04:19:48.173218', 'step': 5696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:48.207468', 'step': 5696, 'epoch': 1} {'type': 'loss', 'content': 0.12091924995183945, 'timestamp': '2025-10-01 04:19:48.209528', 'step': 5697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:48.257514', 'step': 5697, 'epoch': 1} {'type': 'loss', 'content': 0.18267951905727386, 'timestamp': '2025-10-01 04:19:48.261248', 'step': 5698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:48.305100', 'step': 5698, 'epoch': 1} {'type': 'loss', 'content': 0.12823858857154846, 'timestamp': '2025-10-01 04:19:48.307140', 'step': 5699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:48.350796', 'step': 5699, 'epoch': 1} {'type': 'loss', 'content': 0.08830200135707855, 'timestamp': '2025-10-01 04:19:48.374325', 'step': 5700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:48.406156', 'step': 5700, 'epoch': 1} {'type': 'loss', 'content': 0.19581228494644165, 'timestamp': '2025-10-01 04:19:48.408190', 'step': 5701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:48.451796', 'step': 5701, 'epoch': 1} {'type': 'loss', 'content': 0.13104605674743652, 'timestamp': '2025-10-01 04:19:48.453857', 'step': 5702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:48.484938', 'step': 5702, 'epoch': 1} {'type': 'loss', 'content': 0.18143324553966522, 'timestamp': '2025-10-01 04:19:48.486880', 'step': 5703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:48.540568', 'step': 5703, 'epoch': 1} {'type': 'loss', 'content': 0.11851201206445694, 'timestamp': '2025-10-01 04:19:48.564238', 'step': 5704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:48.614186', 'step': 5704, 'epoch': 1} {'type': 'loss', 'content': 0.16053402423858643, 'timestamp': '2025-10-01 04:19:48.616188', 'step': 5705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:48.651397', 'step': 5705, 'epoch': 1} {'type': 'loss', 'content': 0.14659401774406433, 'timestamp': '2025-10-01 04:19:48.654451', 'step': 5706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:48.693469', 'step': 5706, 'epoch': 1} {'type': 'loss', 'content': 0.13609199225902557, 'timestamp': '2025-10-01 04:19:48.696348', 'step': 5707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:48.739277', 'step': 5707, 'epoch': 1} {'type': 'loss', 'content': 0.14510434865951538, 'timestamp': '2025-10-01 04:19:48.768620', 'step': 5708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:48.828675', 'step': 5708, 'epoch': 1} {'type': 'loss', 'content': 0.22568970918655396, 'timestamp': '2025-10-01 04:19:48.830937', 'step': 5709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:48.871567', 'step': 5709, 'epoch': 1} {'type': 'loss', 'content': 0.21735495328903198, 'timestamp': '2025-10-01 04:19:48.873540', 'step': 5710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:48.910534', 'step': 5710, 'epoch': 1} {'type': 'loss', 'content': 0.15619561076164246, 'timestamp': '2025-10-01 04:19:48.914861', 'step': 5711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:48.946796', 'step': 5711, 'epoch': 1} {'type': 'loss', 'content': 0.07414550334215164, 'timestamp': '2025-10-01 04:19:48.970377', 'step': 5712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:49.004790', 'step': 5712, 'epoch': 1} {'type': 'loss', 'content': 0.1301034688949585, 'timestamp': '2025-10-01 04:19:49.007637', 'step': 5713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:49.040611', 'step': 5713, 'epoch': 1} {'type': 'loss', 'content': 0.18111921846866608, 'timestamp': '2025-10-01 04:19:49.048667', 'step': 5714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:49.081000', 'step': 5714, 'epoch': 1} {'type': 'loss', 'content': 0.147537499666214, 'timestamp': '2025-10-01 04:19:49.083583', 'step': 5715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:49.114031', 'step': 5715, 'epoch': 1} {'type': 'loss', 'content': 0.11035618931055069, 'timestamp': '2025-10-01 04:19:49.139470', 'step': 5716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:49.169897', 'step': 5716, 'epoch': 1} {'type': 'loss', 'content': 0.20755095779895782, 'timestamp': '2025-10-01 04:19:49.171998', 'step': 5717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:49.202261', 'step': 5717, 'epoch': 1} {'type': 'loss', 'content': 0.11588768661022186, 'timestamp': '2025-10-01 04:19:49.204720', 'step': 5718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:49.237464', 'step': 5718, 'epoch': 1} {'type': 'loss', 'content': 0.10672339051961899, 'timestamp': '2025-10-01 04:19:49.239535', 'step': 5719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:49.271069', 'step': 5719, 'epoch': 1} {'type': 'loss', 'content': 0.2520441710948944, 'timestamp': '2025-10-01 04:19:49.294552', 'step': 5720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:49.326072', 'step': 5720, 'epoch': 1} {'type': 'loss', 'content': 0.1336706578731537, 'timestamp': '2025-10-01 04:19:49.328114', 'step': 5721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:49.359842', 'step': 5721, 'epoch': 1} {'type': 'loss', 'content': 0.14050619304180145, 'timestamp': '2025-10-01 04:19:49.361666', 'step': 5722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:49.393241', 'step': 5722, 'epoch': 1} {'type': 'loss', 'content': 0.14549776911735535, 'timestamp': '2025-10-01 04:19:49.399810', 'step': 5723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:49.432694', 'step': 5723, 'epoch': 1} {'type': 'loss', 'content': 0.10138452798128128, 'timestamp': '2025-10-01 04:19:49.458797', 'step': 5724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:49.494199', 'step': 5724, 'epoch': 1} {'type': 'loss', 'content': 0.12186720967292786, 'timestamp': '2025-10-01 04:19:49.496315', 'step': 5725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:49.528337', 'step': 5725, 'epoch': 1} {'type': 'loss', 'content': 0.18511971831321716, 'timestamp': '2025-10-01 04:19:49.530793', 'step': 5726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:49.565340', 'step': 5726, 'epoch': 1} {'type': 'loss', 'content': 0.10997336357831955, 'timestamp': '2025-10-01 04:19:49.567738', 'step': 5727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:49.602387', 'step': 5727, 'epoch': 1} {'type': 'loss', 'content': 0.10087840259075165, 'timestamp': '2025-10-01 04:19:49.626036', 'step': 5728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:49.660259', 'step': 5728, 'epoch': 1} {'type': 'loss', 'content': 0.14605844020843506, 'timestamp': '2025-10-01 04:19:49.662332', 'step': 5729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:49.704060', 'step': 5729, 'epoch': 1} {'type': 'loss', 'content': 0.1410052329301834, 'timestamp': '2025-10-01 04:19:49.706011', 'step': 5730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:49.739335', 'step': 5730, 'epoch': 1} {'type': 'loss', 'content': 0.08229807019233704, 'timestamp': '2025-10-01 04:19:49.741734', 'step': 5731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:49.771843', 'step': 5731, 'epoch': 1} {'type': 'loss', 'content': 0.12150266021490097, 'timestamp': '2025-10-01 04:19:49.799878', 'step': 5732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:49.831099', 'step': 5732, 'epoch': 1} {'type': 'loss', 'content': 0.16609416902065277, 'timestamp': '2025-10-01 04:19:49.833553', 'step': 5733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:49.865963', 'step': 5733, 'epoch': 1} {'type': 'loss', 'content': 0.09900446981191635, 'timestamp': '2025-10-01 04:19:49.868250', 'step': 5734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:49.899013', 'step': 5734, 'epoch': 1} {'type': 'loss', 'content': 0.1259097307920456, 'timestamp': '2025-10-01 04:19:49.901280', 'step': 5735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:49.932615', 'step': 5735, 'epoch': 1} {'type': 'loss', 'content': 0.14685890078544617, 'timestamp': '2025-10-01 04:19:49.956260', 'step': 5736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:49.996398', 'step': 5736, 'epoch': 1} {'type': 'loss', 'content': 0.17781656980514526, 'timestamp': '2025-10-01 04:19:49.998481', 'step': 5737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:50.035914', 'step': 5737, 'epoch': 1} {'type': 'loss', 'content': 0.11244619637727737, 'timestamp': '2025-10-01 04:19:50.039931', 'step': 5738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.070059', 'step': 5738, 'epoch': 1} {'type': 'loss', 'content': 0.12467122822999954, 'timestamp': '2025-10-01 04:19:50.072325', 'step': 5739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:50.106235', 'step': 5739, 'epoch': 1} {'type': 'loss', 'content': 0.13821260631084442, 'timestamp': '2025-10-01 04:19:50.130186', 'step': 5740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.176034', 'step': 5740, 'epoch': 1} {'type': 'loss', 'content': 0.10651645809412003, 'timestamp': '2025-10-01 04:19:50.179742', 'step': 5741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:50.214759', 'step': 5741, 'epoch': 1} {'type': 'loss', 'content': 0.15608416497707367, 'timestamp': '2025-10-01 04:19:50.217839', 'step': 5742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.250667', 'step': 5742, 'epoch': 1} {'type': 'loss', 'content': 0.11241453886032104, 'timestamp': '2025-10-01 04:19:50.253056', 'step': 5743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:50.285522', 'step': 5743, 'epoch': 1} {'type': 'loss', 'content': 0.11940574645996094, 'timestamp': '2025-10-01 04:19:50.309057', 'step': 5744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.346278', 'step': 5744, 'epoch': 1} {'type': 'loss', 'content': 0.10389407724142075, 'timestamp': '2025-10-01 04:19:50.348783', 'step': 5745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.381177', 'step': 5745, 'epoch': 1} {'type': 'loss', 'content': 0.2192760407924652, 'timestamp': '2025-10-01 04:19:50.383176', 'step': 5746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.413305', 'step': 5746, 'epoch': 1} {'type': 'loss', 'content': 0.15637148916721344, 'timestamp': '2025-10-01 04:19:50.415375', 'step': 5747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:50.448140', 'step': 5747, 'epoch': 1} {'type': 'loss', 'content': 0.15516233444213867, 'timestamp': '2025-10-01 04:19:50.472082', 'step': 5748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.505050', 'step': 5748, 'epoch': 1} {'type': 'loss', 'content': 0.21728107333183289, 'timestamp': '2025-10-01 04:19:50.519125', 'step': 5749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.550358', 'step': 5749, 'epoch': 1} {'type': 'loss', 'content': 0.13947470486164093, 'timestamp': '2025-10-01 04:19:50.552301', 'step': 5750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.582272', 'step': 5750, 'epoch': 1} {'type': 'loss', 'content': 0.15639251470565796, 'timestamp': '2025-10-01 04:19:50.584322', 'step': 5751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.616063', 'step': 5751, 'epoch': 1} {'type': 'loss', 'content': 0.1105416789650917, 'timestamp': '2025-10-01 04:19:50.639900', 'step': 5752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:50.669981', 'step': 5752, 'epoch': 1} {'type': 'loss', 'content': 0.07449714839458466, 'timestamp': '2025-10-01 04:19:50.672475', 'step': 5753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:50.710006', 'step': 5753, 'epoch': 1} {'type': 'loss', 'content': 0.10144440829753876, 'timestamp': '2025-10-01 04:19:50.712111', 'step': 5754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:50.743702', 'step': 5754, 'epoch': 1} {'type': 'loss', 'content': 0.12722815573215485, 'timestamp': '2025-10-01 04:19:50.745807', 'step': 5755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.777124', 'step': 5755, 'epoch': 1} {'type': 'loss', 'content': 0.11302535980939865, 'timestamp': '2025-10-01 04:19:50.800674', 'step': 5756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:50.831307', 'step': 5756, 'epoch': 1} {'type': 'loss', 'content': 0.153318390250206, 'timestamp': '2025-10-01 04:19:50.833320', 'step': 5757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:50.864699', 'step': 5757, 'epoch': 1} {'type': 'loss', 'content': 0.17976690828800201, 'timestamp': '2025-10-01 04:19:50.867159', 'step': 5758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:50.897523', 'step': 5758, 'epoch': 1} {'type': 'loss', 'content': 0.11335927248001099, 'timestamp': '2025-10-01 04:19:50.899532', 'step': 5759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.933311', 'step': 5759, 'epoch': 1} {'type': 'loss', 'content': 0.10256807506084442, 'timestamp': '2025-10-01 04:19:50.956807', 'step': 5760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:50.987435', 'step': 5760, 'epoch': 1} {'type': 'loss', 'content': 0.1472613513469696, 'timestamp': '2025-10-01 04:19:50.989466', 'step': 5761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:51.020293', 'step': 5761, 'epoch': 1} {'type': 'loss', 'content': 0.15296076238155365, 'timestamp': '2025-10-01 04:19:51.022335', 'step': 5762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:51.052801', 'step': 5762, 'epoch': 1} {'type': 'loss', 'content': 0.06564188748598099, 'timestamp': '2025-10-01 04:19:51.055073', 'step': 5763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:51.085157', 'step': 5763, 'epoch': 1} {'type': 'loss', 'content': 0.1297888606786728, 'timestamp': '2025-10-01 04:19:51.108752', 'step': 5764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:51.139967', 'step': 5764, 'epoch': 1} {'type': 'loss', 'content': 0.33527812361717224, 'timestamp': '2025-10-01 04:19:51.141859', 'step': 5765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:51.176900', 'step': 5765, 'epoch': 1} {'type': 'loss', 'content': 0.17423386871814728, 'timestamp': '2025-10-01 04:19:51.187982', 'step': 5766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:51.222485', 'step': 5766, 'epoch': 1} {'type': 'loss', 'content': 0.17721664905548096, 'timestamp': '2025-10-01 04:19:51.224961', 'step': 5767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:51.255539', 'step': 5767, 'epoch': 1} {'type': 'loss', 'content': 0.17603375017642975, 'timestamp': '2025-10-01 04:19:51.279113', 'step': 5768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:51.310090', 'step': 5768, 'epoch': 1} {'type': 'loss', 'content': 0.1287195384502411, 'timestamp': '2025-10-01 04:19:51.312018', 'step': 5769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:51.346084', 'step': 5769, 'epoch': 1} {'type': 'loss', 'content': 0.16117750108242035, 'timestamp': '2025-10-01 04:19:51.348020', 'step': 5770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:51.378948', 'step': 5770, 'epoch': 1} {'type': 'loss', 'content': 0.2357068806886673, 'timestamp': '2025-10-01 04:19:51.380855', 'step': 5771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:51.411429', 'step': 5771, 'epoch': 1} {'type': 'loss', 'content': 0.3096812069416046, 'timestamp': '2025-10-01 04:19:51.434932', 'step': 5772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:51.466309', 'step': 5772, 'epoch': 1} {'type': 'loss', 'content': 0.21536991000175476, 'timestamp': '2025-10-01 04:19:51.468770', 'step': 5773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:51.500067', 'step': 5773, 'epoch': 1} {'type': 'loss', 'content': 0.17583049833774567, 'timestamp': '2025-10-01 04:19:51.502267', 'step': 5774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:51.533663', 'step': 5774, 'epoch': 1} {'type': 'loss', 'content': 0.19677427411079407, 'timestamp': '2025-10-01 04:19:51.535590', 'step': 5775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:51.566091', 'step': 5775, 'epoch': 1} {'type': 'loss', 'content': 0.13247089087963104, 'timestamp': '2025-10-01 04:19:51.589617', 'step': 5776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:51.621349', 'step': 5776, 'epoch': 1} {'type': 'loss', 'content': 0.11150085926055908, 'timestamp': '2025-10-01 04:19:51.623329', 'step': 5777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:51.655386', 'step': 5777, 'epoch': 1} {'type': 'loss', 'content': 0.13651925325393677, 'timestamp': '2025-10-01 04:19:51.657324', 'step': 5778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:51.688575', 'step': 5778, 'epoch': 1} {'type': 'loss', 'content': 0.1898787021636963, 'timestamp': '2025-10-01 04:19:51.690416', 'step': 5779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:51.722511', 'step': 5779, 'epoch': 1} {'type': 'loss', 'content': 0.1747579723596573, 'timestamp': '2025-10-01 04:19:51.746221', 'step': 5780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:51.779145', 'step': 5780, 'epoch': 1} {'type': 'loss', 'content': 0.16385021805763245, 'timestamp': '2025-10-01 04:19:51.781314', 'step': 5781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:51.811601', 'step': 5781, 'epoch': 1} {'type': 'loss', 'content': 0.08927588164806366, 'timestamp': '2025-10-01 04:19:51.813745', 'step': 5782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:51.844992', 'step': 5782, 'epoch': 1} {'type': 'loss', 'content': 0.19450780749320984, 'timestamp': '2025-10-01 04:19:51.846865', 'step': 5783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:51.881894', 'step': 5783, 'epoch': 1} {'type': 'loss', 'content': 0.1337115615606308, 'timestamp': '2025-10-01 04:19:51.906322', 'step': 5784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:51.936110', 'step': 5784, 'epoch': 1} {'type': 'loss', 'content': 0.18424183130264282, 'timestamp': '2025-10-01 04:19:51.938200', 'step': 5785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:51.967791', 'step': 5785, 'epoch': 1} {'type': 'loss', 'content': 0.18962182104587555, 'timestamp': '2025-10-01 04:19:51.970222', 'step': 5786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:52.000854', 'step': 5786, 'epoch': 1} {'type': 'loss', 'content': 0.2176176905632019, 'timestamp': '2025-10-01 04:19:52.007670', 'step': 5787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:52.039823', 'step': 5787, 'epoch': 1} {'type': 'loss', 'content': 0.07662804424762726, 'timestamp': '2025-10-01 04:19:52.063461', 'step': 5788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:52.096517', 'step': 5788, 'epoch': 1} {'type': 'loss', 'content': 0.16570377349853516, 'timestamp': '2025-10-01 04:19:52.098459', 'step': 5789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:52.128096', 'step': 5789, 'epoch': 1} {'type': 'loss', 'content': 0.19883310794830322, 'timestamp': '2025-10-01 04:19:52.130152', 'step': 5790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:52.168556', 'step': 5790, 'epoch': 1} {'type': 'loss', 'content': 0.1572691947221756, 'timestamp': '2025-10-01 04:19:52.170482', 'step': 5791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:52.201981', 'step': 5791, 'epoch': 1} {'type': 'loss', 'content': 0.14536088705062866, 'timestamp': '2025-10-01 04:19:52.225449', 'step': 5792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:52.256861', 'step': 5792, 'epoch': 1} {'type': 'loss', 'content': 0.18964949250221252, 'timestamp': '2025-10-01 04:19:52.258938', 'step': 5793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:52.289076', 'step': 5793, 'epoch': 1} {'type': 'loss', 'content': 0.17862997949123383, 'timestamp': '2025-10-01 04:19:52.291162', 'step': 5794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:52.321716', 'step': 5794, 'epoch': 1} {'type': 'loss', 'content': 0.1772240400314331, 'timestamp': '2025-10-01 04:19:52.323974', 'step': 5795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:52.353928', 'step': 5795, 'epoch': 1} {'type': 'loss', 'content': 0.22673337161540985, 'timestamp': '2025-10-01 04:19:52.377283', 'step': 5796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:52.409240', 'step': 5796, 'epoch': 1} {'type': 'loss', 'content': 0.12096843123435974, 'timestamp': '2025-10-01 04:19:52.411692', 'step': 5797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:52.442948', 'step': 5797, 'epoch': 1} {'type': 'loss', 'content': 0.07977575063705444, 'timestamp': '2025-10-01 04:19:52.445001', 'step': 5798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:52.478993', 'step': 5798, 'epoch': 1} {'type': 'loss', 'content': 0.11253711581230164, 'timestamp': '2025-10-01 04:19:52.481020', 'step': 5799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:52.512845', 'step': 5799, 'epoch': 1} {'type': 'loss', 'content': 0.1314944624900818, 'timestamp': '2025-10-01 04:19:52.536378', 'step': 5800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:52.575594', 'step': 5800, 'epoch': 1} {'type': 'loss', 'content': 0.2625921070575714, 'timestamp': '2025-10-01 04:19:52.577710', 'step': 5801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:52.610185', 'step': 5801, 'epoch': 1} {'type': 'loss', 'content': 0.24419382214546204, 'timestamp': '2025-10-01 04:19:52.612321', 'step': 5802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:52.645496', 'step': 5802, 'epoch': 1} {'type': 'loss', 'content': 0.04121153801679611, 'timestamp': '2025-10-01 04:19:52.647656', 'step': 5803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:52.679975', 'step': 5803, 'epoch': 1} {'type': 'loss', 'content': 0.11271205544471741, 'timestamp': '2025-10-01 04:19:52.703338', 'step': 5804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:52.734524', 'step': 5804, 'epoch': 1} {'type': 'loss', 'content': 0.11160913854837418, 'timestamp': '2025-10-01 04:19:52.736532', 'step': 5805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:52.768461', 'step': 5805, 'epoch': 1} {'type': 'loss', 'content': 0.20299479365348816, 'timestamp': '2025-10-01 04:19:52.770952', 'step': 5806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:52.806184', 'step': 5806, 'epoch': 1} {'type': 'loss', 'content': 0.13936424255371094, 'timestamp': '2025-10-01 04:19:52.816992', 'step': 5807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:52.848818', 'step': 5807, 'epoch': 1} {'type': 'loss', 'content': 0.19658862054347992, 'timestamp': '2025-10-01 04:19:52.872428', 'step': 5808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:52.903116', 'step': 5808, 'epoch': 1} {'type': 'loss', 'content': 0.14672745764255524, 'timestamp': '2025-10-01 04:19:52.905102', 'step': 5809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:52.935922', 'step': 5809, 'epoch': 1} {'type': 'loss', 'content': 0.09145064651966095, 'timestamp': '2025-10-01 04:19:52.937974', 'step': 5810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:52.968767', 'step': 5810, 'epoch': 1} {'type': 'loss', 'content': 0.0610276497900486, 'timestamp': '2025-10-01 04:19:52.971876', 'step': 5811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:53.003635', 'step': 5811, 'epoch': 1} {'type': 'loss', 'content': 0.11488701403141022, 'timestamp': '2025-10-01 04:19:53.027331', 'step': 5812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:53.065149', 'step': 5812, 'epoch': 1} {'type': 'loss', 'content': 0.15494918823242188, 'timestamp': '2025-10-01 04:19:53.067247', 'step': 5813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:53.097794', 'step': 5813, 'epoch': 1} {'type': 'loss', 'content': 0.17441867291927338, 'timestamp': '2025-10-01 04:19:53.099885', 'step': 5814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:53.130966', 'step': 5814, 'epoch': 1} {'type': 'loss', 'content': 0.16356618702411652, 'timestamp': '2025-10-01 04:19:53.133258', 'step': 5815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:53.164902', 'step': 5815, 'epoch': 1} {'type': 'loss', 'content': 0.1421145796775818, 'timestamp': '2025-10-01 04:19:53.188654', 'step': 5816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:53.219422', 'step': 5816, 'epoch': 1} {'type': 'loss', 'content': 0.13335759937763214, 'timestamp': '2025-10-01 04:19:53.222046', 'step': 5817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:53.253580', 'step': 5817, 'epoch': 1} {'type': 'loss', 'content': 0.1500706523656845, 'timestamp': '2025-10-01 04:19:53.255601', 'step': 5818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:53.286787', 'step': 5818, 'epoch': 1} {'type': 'loss', 'content': 0.07117636501789093, 'timestamp': '2025-10-01 04:19:53.289861', 'step': 5819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:53.322184', 'step': 5819, 'epoch': 1} {'type': 'loss', 'content': 0.18181516230106354, 'timestamp': '2025-10-01 04:19:53.345717', 'step': 5820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:53.376112', 'step': 5820, 'epoch': 1} {'type': 'loss', 'content': 0.13391847908496857, 'timestamp': '2025-10-01 04:19:53.379081', 'step': 5821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:53.414880', 'step': 5821, 'epoch': 1} {'type': 'loss', 'content': 0.21486800909042358, 'timestamp': '2025-10-01 04:19:53.416945', 'step': 5822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:53.449713', 'step': 5822, 'epoch': 1} {'type': 'loss', 'content': 0.129871666431427, 'timestamp': '2025-10-01 04:19:53.451645', 'step': 5823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:53.481874', 'step': 5823, 'epoch': 1} {'type': 'loss', 'content': 0.1324896365404129, 'timestamp': '2025-10-01 04:19:53.505299', 'step': 5824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:53.536130', 'step': 5824, 'epoch': 1} {'type': 'loss', 'content': 0.135224848985672, 'timestamp': '2025-10-01 04:19:53.538372', 'step': 5825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:53.570519', 'step': 5825, 'epoch': 1} {'type': 'loss', 'content': 0.18615755438804626, 'timestamp': '2025-10-01 04:19:53.572973', 'step': 5826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:53.603390', 'step': 5826, 'epoch': 1} {'type': 'loss', 'content': 0.13504847884178162, 'timestamp': '2025-10-01 04:19:53.605580', 'step': 5827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:53.638102', 'step': 5827, 'epoch': 1} {'type': 'loss', 'content': 0.09449712932109833, 'timestamp': '2025-10-01 04:19:53.661725', 'step': 5828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:53.692453', 'step': 5828, 'epoch': 1} {'type': 'loss', 'content': 0.14822404086589813, 'timestamp': '2025-10-01 04:19:53.695241', 'step': 5829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:53.728553', 'step': 5829, 'epoch': 1} {'type': 'loss', 'content': 0.13972382247447968, 'timestamp': '2025-10-01 04:19:53.730928', 'step': 5830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:53.763568', 'step': 5830, 'epoch': 1} {'type': 'loss', 'content': 0.18886063992977142, 'timestamp': '2025-10-01 04:19:53.766621', 'step': 5831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:53.797523', 'step': 5831, 'epoch': 1} {'type': 'loss', 'content': 0.14685514569282532, 'timestamp': '2025-10-01 04:19:53.822103', 'step': 5832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:19:53.855130', 'step': 5832, 'epoch': 1} {'type': 'loss', 'content': 0.2235451489686966, 'timestamp': '2025-10-01 04:19:53.857514', 'step': 5833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:53.888617', 'step': 5833, 'epoch': 1} {'type': 'loss', 'content': 0.12718844413757324, 'timestamp': '2025-10-01 04:19:53.890689', 'step': 5834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:53.920925', 'step': 5834, 'epoch': 1} {'type': 'loss', 'content': 0.08243441581726074, 'timestamp': '2025-10-01 04:19:53.923769', 'step': 5835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:53.959448', 'step': 5835, 'epoch': 1} {'type': 'loss', 'content': 0.20236320793628693, 'timestamp': '2025-10-01 04:19:53.983139', 'step': 5836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:54.014550', 'step': 5836, 'epoch': 1} {'type': 'loss', 'content': 0.10666998475790024, 'timestamp': '2025-10-01 04:19:54.016861', 'step': 5837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:54.048826', 'step': 5837, 'epoch': 1} {'type': 'loss', 'content': 0.15366648137569427, 'timestamp': '2025-10-01 04:19:54.050908', 'step': 5838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:54.088040', 'step': 5838, 'epoch': 1} {'type': 'loss', 'content': 0.1392829865217209, 'timestamp': '2025-10-01 04:19:54.091045', 'step': 5839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:54.125871', 'step': 5839, 'epoch': 1} {'type': 'loss', 'content': 0.14864221215248108, 'timestamp': '2025-10-01 04:19:54.149448', 'step': 5840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:54.181715', 'step': 5840, 'epoch': 1} {'type': 'loss', 'content': 0.10929720103740692, 'timestamp': '2025-10-01 04:19:54.184376', 'step': 5841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:54.215567', 'step': 5841, 'epoch': 1} {'type': 'loss', 'content': 0.15241536498069763, 'timestamp': '2025-10-01 04:19:54.217821', 'step': 5842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:54.249505', 'step': 5842, 'epoch': 1} {'type': 'loss', 'content': 0.08030989021062851, 'timestamp': '2025-10-01 04:19:54.252072', 'step': 5843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:54.283975', 'step': 5843, 'epoch': 1} {'type': 'loss', 'content': 0.11317114531993866, 'timestamp': '2025-10-01 04:19:54.311116', 'step': 5844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:54.343113', 'step': 5844, 'epoch': 1} {'type': 'loss', 'content': 0.1413167417049408, 'timestamp': '2025-10-01 04:19:54.345257', 'step': 5845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:54.376054', 'step': 5845, 'epoch': 1} {'type': 'loss', 'content': 0.18869854509830475, 'timestamp': '2025-10-01 04:19:54.378316', 'step': 5846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:54.410589', 'step': 5846, 'epoch': 1} {'type': 'loss', 'content': 0.24514001607894897, 'timestamp': '2025-10-01 04:19:54.412545', 'step': 5847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:54.443244', 'step': 5847, 'epoch': 1} {'type': 'loss', 'content': 0.1985010802745819, 'timestamp': '2025-10-01 04:19:54.466968', 'step': 5848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:54.497243', 'step': 5848, 'epoch': 1} {'type': 'loss', 'content': 0.2104041427373886, 'timestamp': '2025-10-01 04:19:54.499485', 'step': 5849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:54.530737', 'step': 5849, 'epoch': 1} {'type': 'loss', 'content': 0.20943544805049896, 'timestamp': '2025-10-01 04:19:54.532858', 'step': 5850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:54.563560', 'step': 5850, 'epoch': 1} {'type': 'loss', 'content': 0.19710667431354523, 'timestamp': '2025-10-01 04:19:54.565925', 'step': 5851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:54.596300', 'step': 5851, 'epoch': 1} {'type': 'loss', 'content': 0.10699243098497391, 'timestamp': '2025-10-01 04:19:54.620069', 'step': 5852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:54.653592', 'step': 5852, 'epoch': 1} {'type': 'loss', 'content': 0.17566388845443726, 'timestamp': '2025-10-01 04:19:54.655664', 'step': 5853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:54.686915', 'step': 5853, 'epoch': 1} {'type': 'loss', 'content': 0.12515002489089966, 'timestamp': '2025-10-01 04:19:54.689009', 'step': 5854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:54.720837', 'step': 5854, 'epoch': 1} {'type': 'loss', 'content': 0.22399447858333588, 'timestamp': '2025-10-01 04:19:54.723061', 'step': 5855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:54.752897', 'step': 5855, 'epoch': 1} {'type': 'loss', 'content': 0.14652319252490997, 'timestamp': '2025-10-01 04:19:54.776449', 'step': 5856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:54.808551', 'step': 5856, 'epoch': 1} {'type': 'loss', 'content': 0.255310595035553, 'timestamp': '2025-10-01 04:19:54.810541', 'step': 5857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:54.844515', 'step': 5857, 'epoch': 1} {'type': 'loss', 'content': 0.14918678998947144, 'timestamp': '2025-10-01 04:19:54.846756', 'step': 5858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:54.877536', 'step': 5858, 'epoch': 1} {'type': 'loss', 'content': 0.18432293832302094, 'timestamp': '2025-10-01 04:19:54.882975', 'step': 5859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:54.915105', 'step': 5859, 'epoch': 1} {'type': 'loss', 'content': 0.21889041364192963, 'timestamp': '2025-10-01 04:19:54.939943', 'step': 5860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:54.973026', 'step': 5860, 'epoch': 1} {'type': 'loss', 'content': 0.14946305751800537, 'timestamp': '2025-10-01 04:19:54.975140', 'step': 5861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:55.006806', 'step': 5861, 'epoch': 1} {'type': 'loss', 'content': 0.177986741065979, 'timestamp': '2025-10-01 04:19:55.009014', 'step': 5862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:55.041658', 'step': 5862, 'epoch': 1} {'type': 'loss', 'content': 0.1471015065908432, 'timestamp': '2025-10-01 04:19:55.043885', 'step': 5863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:55.075700', 'step': 5863, 'epoch': 1} {'type': 'loss', 'content': 0.0837261825799942, 'timestamp': '2025-10-01 04:19:55.099639', 'step': 5864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:55.131856', 'step': 5864, 'epoch': 1} {'type': 'loss', 'content': 0.13441672921180725, 'timestamp': '2025-10-01 04:19:55.133909', 'step': 5865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:55.165686', 'step': 5865, 'epoch': 1} {'type': 'loss', 'content': 0.16636650264263153, 'timestamp': '2025-10-01 04:19:55.167827', 'step': 5866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:55.198175', 'step': 5866, 'epoch': 1} {'type': 'loss', 'content': 0.17374932765960693, 'timestamp': '2025-10-01 04:19:55.207860', 'step': 5867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:55.239347', 'step': 5867, 'epoch': 1} {'type': 'loss', 'content': 0.15114550292491913, 'timestamp': '2025-10-01 04:19:55.263030', 'step': 5868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:55.293921', 'step': 5868, 'epoch': 1} {'type': 'loss', 'content': 0.1554185003042221, 'timestamp': '2025-10-01 04:19:55.296576', 'step': 5869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:55.328091', 'step': 5869, 'epoch': 1} {'type': 'loss', 'content': 0.18403542041778564, 'timestamp': '2025-10-01 04:19:55.331001', 'step': 5870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:55.362022', 'step': 5870, 'epoch': 1} {'type': 'loss', 'content': 0.1253967583179474, 'timestamp': '2025-10-01 04:19:55.363910', 'step': 5871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:55.394326', 'step': 5871, 'epoch': 1} {'type': 'loss', 'content': 0.09153145551681519, 'timestamp': '2025-10-01 04:19:55.417914', 'step': 5872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:55.449873', 'step': 5872, 'epoch': 1} {'type': 'loss', 'content': 0.14727221429347992, 'timestamp': '2025-10-01 04:19:55.452049', 'step': 5873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:55.482845', 'step': 5873, 'epoch': 1} {'type': 'loss', 'content': 0.09585355967283249, 'timestamp': '2025-10-01 04:19:55.485176', 'step': 5874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:55.515603', 'step': 5874, 'epoch': 1} {'type': 'loss', 'content': 0.1586134433746338, 'timestamp': '2025-10-01 04:19:55.517987', 'step': 5875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:55.552116', 'step': 5875, 'epoch': 1} {'type': 'loss', 'content': 0.1517396718263626, 'timestamp': '2025-10-01 04:19:55.575775', 'step': 5876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:55.605876', 'step': 5876, 'epoch': 1} {'type': 'loss', 'content': 0.10229466110467911, 'timestamp': '2025-10-01 04:19:55.620042', 'step': 5877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:55.652383', 'step': 5877, 'epoch': 1} {'type': 'loss', 'content': 0.0848812460899353, 'timestamp': '2025-10-01 04:19:55.654586', 'step': 5878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:55.685863', 'step': 5878, 'epoch': 1} {'type': 'loss', 'content': 0.15670080482959747, 'timestamp': '2025-10-01 04:19:55.688559', 'step': 5879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:55.721190', 'step': 5879, 'epoch': 1} {'type': 'loss', 'content': 0.15014059841632843, 'timestamp': '2025-10-01 04:19:55.744583', 'step': 5880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:55.776559', 'step': 5880, 'epoch': 1} {'type': 'loss', 'content': 0.25750628113746643, 'timestamp': '2025-10-01 04:19:55.778866', 'step': 5881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:55.810648', 'step': 5881, 'epoch': 1} {'type': 'loss', 'content': 0.19290363788604736, 'timestamp': '2025-10-01 04:19:55.812754', 'step': 5882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:55.844026', 'step': 5882, 'epoch': 1} {'type': 'loss', 'content': 0.15648192167282104, 'timestamp': '2025-10-01 04:19:55.846442', 'step': 5883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:55.878391', 'step': 5883, 'epoch': 1} {'type': 'loss', 'content': 0.1816767305135727, 'timestamp': '2025-10-01 04:19:55.902104', 'step': 5884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:55.933548', 'step': 5884, 'epoch': 1} {'type': 'loss', 'content': 0.06136254593729973, 'timestamp': '2025-10-01 04:19:55.935999', 'step': 5885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:55.967800', 'step': 5885, 'epoch': 1} {'type': 'loss', 'content': 0.19710637629032135, 'timestamp': '2025-10-01 04:19:55.969975', 'step': 5886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:56.002458', 'step': 5886, 'epoch': 1} {'type': 'loss', 'content': 0.09956979751586914, 'timestamp': '2025-10-01 04:19:56.004982', 'step': 5887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:56.035808', 'step': 5887, 'epoch': 1} {'type': 'loss', 'content': 0.149362251162529, 'timestamp': '2025-10-01 04:19:56.060137', 'step': 5888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:56.094814', 'step': 5888, 'epoch': 1} {'type': 'loss', 'content': 0.30636081099510193, 'timestamp': '2025-10-01 04:19:56.097330', 'step': 5889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:56.141833', 'step': 5889, 'epoch': 1} {'type': 'loss', 'content': 0.1166490986943245, 'timestamp': '2025-10-01 04:19:56.143686', 'step': 5890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:56.176280', 'step': 5890, 'epoch': 1} {'type': 'loss', 'content': 0.1608259230852127, 'timestamp': '2025-10-01 04:19:56.178608', 'step': 5891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:56.209484', 'step': 5891, 'epoch': 1} {'type': 'loss', 'content': 0.10993396490812302, 'timestamp': '2025-10-01 04:19:56.233399', 'step': 5892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:56.264795', 'step': 5892, 'epoch': 1} {'type': 'loss', 'content': 0.09829248487949371, 'timestamp': '2025-10-01 04:19:56.266859', 'step': 5893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:56.298410', 'step': 5893, 'epoch': 1} {'type': 'loss', 'content': 0.14044244587421417, 'timestamp': '2025-10-01 04:19:56.300401', 'step': 5894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:56.332915', 'step': 5894, 'epoch': 1} {'type': 'loss', 'content': 0.22558671236038208, 'timestamp': '2025-10-01 04:19:56.334874', 'step': 5895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:56.365900', 'step': 5895, 'epoch': 1} {'type': 'loss', 'content': 0.16050824522972107, 'timestamp': '2025-10-01 04:19:56.389723', 'step': 5896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:56.421689', 'step': 5896, 'epoch': 1} {'type': 'loss', 'content': 0.20173998177051544, 'timestamp': '2025-10-01 04:19:56.423881', 'step': 5897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:56.457823', 'step': 5897, 'epoch': 1} {'type': 'loss', 'content': 0.12820090353488922, 'timestamp': '2025-10-01 04:19:56.459961', 'step': 5898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:56.492216', 'step': 5898, 'epoch': 1} {'type': 'loss', 'content': 0.07878582179546356, 'timestamp': '2025-10-01 04:19:56.494367', 'step': 5899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:56.524634', 'step': 5899, 'epoch': 1} {'type': 'loss', 'content': 0.21227946877479553, 'timestamp': '2025-10-01 04:19:56.548127', 'step': 5900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:56.579502', 'step': 5900, 'epoch': 1} {'type': 'loss', 'content': 0.11907315254211426, 'timestamp': '2025-10-01 04:19:56.592325', 'step': 5901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:56.625992', 'step': 5901, 'epoch': 1} {'type': 'loss', 'content': 0.10153515636920929, 'timestamp': '2025-10-01 04:19:56.633045', 'step': 5902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:56.670442', 'step': 5902, 'epoch': 1} {'type': 'loss', 'content': 0.09892220795154572, 'timestamp': '2025-10-01 04:19:56.672450', 'step': 5903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:56.704700', 'step': 5903, 'epoch': 1} {'type': 'loss', 'content': 0.2687683403491974, 'timestamp': '2025-10-01 04:19:56.728396', 'step': 5904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:56.758813', 'step': 5904, 'epoch': 1} {'type': 'loss', 'content': 0.16013772785663605, 'timestamp': '2025-10-01 04:19:56.760902', 'step': 5905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:56.791625', 'step': 5905, 'epoch': 1} {'type': 'loss', 'content': 0.17632420361042023, 'timestamp': '2025-10-01 04:19:56.793645', 'step': 5906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:56.839158', 'step': 5906, 'epoch': 1} {'type': 'loss', 'content': 0.15123356878757477, 'timestamp': '2025-10-01 04:19:56.841279', 'step': 5907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:56.872048', 'step': 5907, 'epoch': 1} {'type': 'loss', 'content': 0.16762852668762207, 'timestamp': '2025-10-01 04:19:56.895549', 'step': 5908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:56.928425', 'step': 5908, 'epoch': 1} {'type': 'loss', 'content': 0.14269106090068817, 'timestamp': '2025-10-01 04:19:56.930958', 'step': 5909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:56.962757', 'step': 5909, 'epoch': 1} {'type': 'loss', 'content': 0.14431023597717285, 'timestamp': '2025-10-01 04:19:56.964884', 'step': 5910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:56.997682', 'step': 5910, 'epoch': 1} {'type': 'loss', 'content': 0.17293663322925568, 'timestamp': '2025-10-01 04:19:56.999729', 'step': 5911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:57.031278', 'step': 5911, 'epoch': 1} {'type': 'loss', 'content': 0.12360893189907074, 'timestamp': '2025-10-01 04:19:57.054812', 'step': 5912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:57.085912', 'step': 5912, 'epoch': 1} {'type': 'loss', 'content': 0.13506081700325012, 'timestamp': '2025-10-01 04:19:57.088155', 'step': 5913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.122615', 'step': 5913, 'epoch': 1} {'type': 'loss', 'content': 0.2927789092063904, 'timestamp': '2025-10-01 04:19:57.124627', 'step': 5914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:57.158830', 'step': 5914, 'epoch': 1} {'type': 'loss', 'content': 0.2102472484111786, 'timestamp': '2025-10-01 04:19:57.161223', 'step': 5915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:57.191887', 'step': 5915, 'epoch': 1} {'type': 'loss', 'content': 0.12231621891260147, 'timestamp': '2025-10-01 04:19:57.215485', 'step': 5916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:57.246851', 'step': 5916, 'epoch': 1} {'type': 'loss', 'content': 0.1838674545288086, 'timestamp': '2025-10-01 04:19:57.251838', 'step': 5917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:57.285623', 'step': 5917, 'epoch': 1} {'type': 'loss', 'content': 0.13018281757831573, 'timestamp': '2025-10-01 04:19:57.287526', 'step': 5918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.317859', 'step': 5918, 'epoch': 1} {'type': 'loss', 'content': 0.1978476196527481, 'timestamp': '2025-10-01 04:19:57.319979', 'step': 5919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.349795', 'step': 5919, 'epoch': 1} {'type': 'loss', 'content': 0.1806831806898117, 'timestamp': '2025-10-01 04:19:57.373350', 'step': 5920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.406028', 'step': 5920, 'epoch': 1} {'type': 'loss', 'content': 0.15629619359970093, 'timestamp': '2025-10-01 04:19:57.408046', 'step': 5921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:57.439183', 'step': 5921, 'epoch': 1} {'type': 'loss', 'content': 0.21227259933948517, 'timestamp': '2025-10-01 04:19:57.441214', 'step': 5922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.472687', 'step': 5922, 'epoch': 1} {'type': 'loss', 'content': 0.23466630280017853, 'timestamp': '2025-10-01 04:19:57.486244', 'step': 5923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:57.518362', 'step': 5923, 'epoch': 1} {'type': 'loss', 'content': 0.1344911754131317, 'timestamp': '2025-10-01 04:19:57.541814', 'step': 5924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:57.573099', 'step': 5924, 'epoch': 1} {'type': 'loss', 'content': 0.09030643850564957, 'timestamp': '2025-10-01 04:19:57.575204', 'step': 5925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:57.605617', 'step': 5925, 'epoch': 1} {'type': 'loss', 'content': 0.13035370409488678, 'timestamp': '2025-10-01 04:19:57.607800', 'step': 5926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.638395', 'step': 5926, 'epoch': 1} {'type': 'loss', 'content': 0.07384832203388214, 'timestamp': '2025-10-01 04:19:57.650760', 'step': 5927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.682711', 'step': 5927, 'epoch': 1} {'type': 'loss', 'content': 0.0708354189991951, 'timestamp': '2025-10-01 04:19:57.707264', 'step': 5928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.737687', 'step': 5928, 'epoch': 1} {'type': 'loss', 'content': 0.09753717482089996, 'timestamp': '2025-10-01 04:19:57.739777', 'step': 5929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.770348', 'step': 5929, 'epoch': 1} {'type': 'loss', 'content': 0.056051645427942276, 'timestamp': '2025-10-01 04:19:57.772655', 'step': 5930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.803859', 'step': 5930, 'epoch': 1} {'type': 'loss', 'content': 0.16017501056194305, 'timestamp': '2025-10-01 04:19:57.806267', 'step': 5931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.838428', 'step': 5931, 'epoch': 1} {'type': 'loss', 'content': 0.17347824573516846, 'timestamp': '2025-10-01 04:19:57.862002', 'step': 5932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:57.892215', 'step': 5932, 'epoch': 1} {'type': 'loss', 'content': 0.24178092181682587, 'timestamp': '2025-10-01 04:19:57.894389', 'step': 5933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:57.925072', 'step': 5933, 'epoch': 1} {'type': 'loss', 'content': 0.1282031387090683, 'timestamp': '2025-10-01 04:19:57.927133', 'step': 5934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:57.958186', 'step': 5934, 'epoch': 1} {'type': 'loss', 'content': 0.17033225297927856, 'timestamp': '2025-10-01 04:19:57.960234', 'step': 5935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:57.991288', 'step': 5935, 'epoch': 1} {'type': 'loss', 'content': 0.1417827606201172, 'timestamp': '2025-10-01 04:19:58.020604', 'step': 5936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:58.053513', 'step': 5936, 'epoch': 1} {'type': 'loss', 'content': 0.09820602089166641, 'timestamp': '2025-10-01 04:19:58.055541', 'step': 5937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:58.086246', 'step': 5937, 'epoch': 1} {'type': 'loss', 'content': 0.18416818976402283, 'timestamp': '2025-10-01 04:19:58.088232', 'step': 5938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:58.121354', 'step': 5938, 'epoch': 1} {'type': 'loss', 'content': 0.14848431944847107, 'timestamp': '2025-10-01 04:19:58.123485', 'step': 5939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:58.154254', 'step': 5939, 'epoch': 1} {'type': 'loss', 'content': 0.3384638726711273, 'timestamp': '2025-10-01 04:19:58.177920', 'step': 5940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:58.208297', 'step': 5940, 'epoch': 1} {'type': 'loss', 'content': 0.19885902106761932, 'timestamp': '2025-10-01 04:19:58.210758', 'step': 5941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:58.242691', 'step': 5941, 'epoch': 1} {'type': 'loss', 'content': 0.16043148934841156, 'timestamp': '2025-10-01 04:19:58.244605', 'step': 5942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:58.275352', 'step': 5942, 'epoch': 1} {'type': 'loss', 'content': 0.07223345339298248, 'timestamp': '2025-10-01 04:19:58.278048', 'step': 5943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:58.313964', 'step': 5943, 'epoch': 1} {'type': 'loss', 'content': 0.1569722294807434, 'timestamp': '2025-10-01 04:19:58.337534', 'step': 5944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:58.369336', 'step': 5944, 'epoch': 1} {'type': 'loss', 'content': 0.25556129217147827, 'timestamp': '2025-10-01 04:19:58.371636', 'step': 5945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:58.402338', 'step': 5945, 'epoch': 1} {'type': 'loss', 'content': 0.11889699846506119, 'timestamp': '2025-10-01 04:19:58.404978', 'step': 5946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:58.435245', 'step': 5946, 'epoch': 1} {'type': 'loss', 'content': 0.13962604105472565, 'timestamp': '2025-10-01 04:19:58.437710', 'step': 5947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:58.467932', 'step': 5947, 'epoch': 1} {'type': 'loss', 'content': 0.18680353462696075, 'timestamp': '2025-10-01 04:19:58.491442', 'step': 5948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:58.526525', 'step': 5948, 'epoch': 1} {'type': 'loss', 'content': 0.18382415175437927, 'timestamp': '2025-10-01 04:19:58.528539', 'step': 5949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:58.560071', 'step': 5949, 'epoch': 1} {'type': 'loss', 'content': 0.1797134429216385, 'timestamp': '2025-10-01 04:19:58.563433', 'step': 5950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:58.595682', 'step': 5950, 'epoch': 1} {'type': 'loss', 'content': 0.17287196218967438, 'timestamp': '2025-10-01 04:19:58.597675', 'step': 5951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:58.628110', 'step': 5951, 'epoch': 1} {'type': 'loss', 'content': 0.14963920414447784, 'timestamp': '2025-10-01 04:19:58.651630', 'step': 5952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:58.684595', 'step': 5952, 'epoch': 1} {'type': 'loss', 'content': 0.16413770616054535, 'timestamp': '2025-10-01 04:19:58.686603', 'step': 5953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:58.718729', 'step': 5953, 'epoch': 1} {'type': 'loss', 'content': 0.11592023074626923, 'timestamp': '2025-10-01 04:19:58.720643', 'step': 5954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:58.754013', 'step': 5954, 'epoch': 1} {'type': 'loss', 'content': 0.11346809566020966, 'timestamp': '2025-10-01 04:19:58.756161', 'step': 5955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:58.786715', 'step': 5955, 'epoch': 1} {'type': 'loss', 'content': 0.11539925634860992, 'timestamp': '2025-10-01 04:19:58.810358', 'step': 5956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:58.840941', 'step': 5956, 'epoch': 1} {'type': 'loss', 'content': 0.13133138418197632, 'timestamp': '2025-10-01 04:19:58.843316', 'step': 5957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:58.876461', 'step': 5957, 'epoch': 1} {'type': 'loss', 'content': 0.17507565021514893, 'timestamp': '2025-10-01 04:19:58.878394', 'step': 5958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:58.909677', 'step': 5958, 'epoch': 1} {'type': 'loss', 'content': 0.18966256082057953, 'timestamp': '2025-10-01 04:19:58.911643', 'step': 5959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:58.942372', 'step': 5959, 'epoch': 1} {'type': 'loss', 'content': 0.08040166646242142, 'timestamp': '2025-10-01 04:19:58.965895', 'step': 5960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:58.996455', 'step': 5960, 'epoch': 1} {'type': 'loss', 'content': 0.2001871019601822, 'timestamp': '2025-10-01 04:19:58.998519', 'step': 5961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:59.035925', 'step': 5961, 'epoch': 1} {'type': 'loss', 'content': 0.1344134360551834, 'timestamp': '2025-10-01 04:19:59.037937', 'step': 5962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:59.070001', 'step': 5962, 'epoch': 1} {'type': 'loss', 'content': 0.09043493121862411, 'timestamp': '2025-10-01 04:19:59.072013', 'step': 5963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:59.103216', 'step': 5963, 'epoch': 1} {'type': 'loss', 'content': 0.07801427692174911, 'timestamp': '2025-10-01 04:19:59.127224', 'step': 5964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:59.160945', 'step': 5964, 'epoch': 1} {'type': 'loss', 'content': 0.1148705780506134, 'timestamp': '2025-10-01 04:19:59.162981', 'step': 5965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:59.202034', 'step': 5965, 'epoch': 1} {'type': 'loss', 'content': 0.122196264564991, 'timestamp': '2025-10-01 04:19:59.203990', 'step': 5966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:59.235485', 'step': 5966, 'epoch': 1} {'type': 'loss', 'content': 0.10682875663042068, 'timestamp': '2025-10-01 04:19:59.244615', 'step': 5967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:59.277968', 'step': 5967, 'epoch': 1} {'type': 'loss', 'content': 0.1363973468542099, 'timestamp': '2025-10-01 04:19:59.301707', 'step': 5968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:59.333340', 'step': 5968, 'epoch': 1} {'type': 'loss', 'content': 0.0945720225572586, 'timestamp': '2025-10-01 04:19:59.335877', 'step': 5969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:59.368688', 'step': 5969, 'epoch': 1} {'type': 'loss', 'content': 0.14027567207813263, 'timestamp': '2025-10-01 04:19:59.370806', 'step': 5970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:19:59.402270', 'step': 5970, 'epoch': 1} {'type': 'loss', 'content': 0.11596495658159256, 'timestamp': '2025-10-01 04:19:59.406041', 'step': 5971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:19:59.440936', 'step': 5971, 'epoch': 1} {'type': 'loss', 'content': 0.11326151341199875, 'timestamp': '2025-10-01 04:19:59.473584', 'step': 5972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:59.506885', 'step': 5972, 'epoch': 1} {'type': 'loss', 'content': 0.1630052626132965, 'timestamp': '2025-10-01 04:19:59.508890', 'step': 5973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:59.542691', 'step': 5973, 'epoch': 1} {'type': 'loss', 'content': 0.1134757325053215, 'timestamp': '2025-10-01 04:19:59.544481', 'step': 5974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:59.579028', 'step': 5974, 'epoch': 1} {'type': 'loss', 'content': 0.22018447518348694, 'timestamp': '2025-10-01 04:19:59.581768', 'step': 5975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:59.616229', 'step': 5975, 'epoch': 1} {'type': 'loss', 'content': 0.14465059340000153, 'timestamp': '2025-10-01 04:19:59.639912', 'step': 5976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:59.676108', 'step': 5976, 'epoch': 1} {'type': 'loss', 'content': 0.1319880485534668, 'timestamp': '2025-10-01 04:19:59.678641', 'step': 5977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:59.709526', 'step': 5977, 'epoch': 1} {'type': 'loss', 'content': 0.12243489921092987, 'timestamp': '2025-10-01 04:19:59.711742', 'step': 5978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:19:59.748446', 'step': 5978, 'epoch': 1} {'type': 'loss', 'content': 0.22124303877353668, 'timestamp': '2025-10-01 04:19:59.750409', 'step': 5979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:19:59.784528', 'step': 5979, 'epoch': 1} {'type': 'loss', 'content': 0.14794081449508667, 'timestamp': '2025-10-01 04:19:59.810283', 'step': 5980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:19:59.842687', 'step': 5980, 'epoch': 1} {'type': 'loss', 'content': 0.16429618000984192, 'timestamp': '2025-10-01 04:19:59.845491', 'step': 5981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:19:59.880686', 'step': 5981, 'epoch': 1} {'type': 'loss', 'content': 0.1685493141412735, 'timestamp': '2025-10-01 04:19:59.883879', 'step': 5982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:19:59.927149', 'step': 5982, 'epoch': 1} {'type': 'loss', 'content': 0.19132007658481598, 'timestamp': '2025-10-01 04:19:59.929135', 'step': 5983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:19:59.964760', 'step': 5983, 'epoch': 1} {'type': 'loss', 'content': 0.25826284289360046, 'timestamp': '2025-10-01 04:19:59.988217', 'step': 5984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:00.022321', 'step': 5984, 'epoch': 1} {'type': 'loss', 'content': 0.14877860248088837, 'timestamp': '2025-10-01 04:20:00.024482', 'step': 5985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:00.058557', 'step': 5985, 'epoch': 1} {'type': 'loss', 'content': 0.0935952216386795, 'timestamp': '2025-10-01 04:20:00.060657', 'step': 5986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:00.092840', 'step': 5986, 'epoch': 1} {'type': 'loss', 'content': 0.1381572037935257, 'timestamp': '2025-10-01 04:20:00.097053', 'step': 5987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:00.130685', 'step': 5987, 'epoch': 1} {'type': 'loss', 'content': 0.15158772468566895, 'timestamp': '2025-10-01 04:20:00.156096', 'step': 5988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:00.187868', 'step': 5988, 'epoch': 1} {'type': 'loss', 'content': 0.12287063151597977, 'timestamp': '2025-10-01 04:20:00.189904', 'step': 5989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:00.231787', 'step': 5989, 'epoch': 1} {'type': 'loss', 'content': 0.19485823810100555, 'timestamp': '2025-10-01 04:20:00.234190', 'step': 5990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:00.272484', 'step': 5990, 'epoch': 1} {'type': 'loss', 'content': 0.11645560711622238, 'timestamp': '2025-10-01 04:20:00.274631', 'step': 5991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:00.307925', 'step': 5991, 'epoch': 1} {'type': 'loss', 'content': 0.07997538149356842, 'timestamp': '2025-10-01 04:20:00.331487', 'step': 5992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:00.369518', 'step': 5992, 'epoch': 1} {'type': 'loss', 'content': 0.19692440330982208, 'timestamp': '2025-10-01 04:20:00.371665', 'step': 5993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:00.405831', 'step': 5993, 'epoch': 1} {'type': 'loss', 'content': 0.18650084733963013, 'timestamp': '2025-10-01 04:20:00.407933', 'step': 5994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:00.443448', 'step': 5994, 'epoch': 1} {'type': 'loss', 'content': 0.24384649097919464, 'timestamp': '2025-10-01 04:20:00.445365', 'step': 5995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:00.477262', 'step': 5995, 'epoch': 1} {'type': 'loss', 'content': 0.152040496468544, 'timestamp': '2025-10-01 04:20:00.509244', 'step': 5996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:00.540854', 'step': 5996, 'epoch': 1} {'type': 'loss', 'content': 0.15399070084095, 'timestamp': '2025-10-01 04:20:00.542766', 'step': 5997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:00.572195', 'step': 5997, 'epoch': 1} {'type': 'loss', 'content': 0.1471284031867981, 'timestamp': '2025-10-01 04:20:00.574219', 'step': 5998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:00.607745', 'step': 5998, 'epoch': 1} {'type': 'loss', 'content': 0.2190193086862564, 'timestamp': '2025-10-01 04:20:00.609912', 'step': 5999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:00.642576', 'step': 5999, 'epoch': 1} {'type': 'loss', 'content': 0.2456466257572174, 'timestamp': '2025-10-01 04:20:00.674065', 'step': 6000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-10-01 04:20:06.172373', 'step': 6000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:06.213816', 'step': 6000, 'epoch': 1} {'type': 'loss', 'content': 0.1492803990840912, 'timestamp': '2025-10-01 04:20:06.215850', 'step': 6001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:06.248326', 'step': 6001, 'epoch': 1} {'type': 'loss', 'content': 0.2055148333311081, 'timestamp': '2025-10-01 04:20:06.251295', 'step': 6002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:06.288849', 'step': 6002, 'epoch': 1} {'type': 'loss', 'content': 0.16680440306663513, 'timestamp': '2025-10-01 04:20:06.290892', 'step': 6003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:06.323869', 'step': 6003, 'epoch': 1} {'type': 'loss', 'content': 0.1174389198422432, 'timestamp': '2025-10-01 04:20:06.347707', 'step': 6004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:06.378154', 'step': 6004, 'epoch': 1} {'type': 'loss', 'content': 0.1486230492591858, 'timestamp': '2025-10-01 04:20:06.380092', 'step': 6005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:06.410499', 'step': 6005, 'epoch': 1} {'type': 'loss', 'content': 0.08384909480810165, 'timestamp': '2025-10-01 04:20:06.412553', 'step': 6006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:06.442957', 'step': 6006, 'epoch': 1} {'type': 'loss', 'content': 0.23299631476402283, 'timestamp': '2025-10-01 04:20:06.445608', 'step': 6007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:06.476072', 'step': 6007, 'epoch': 1} {'type': 'loss', 'content': 0.09202713519334793, 'timestamp': '2025-10-01 04:20:06.499662', 'step': 6008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:06.530196', 'step': 6008, 'epoch': 1} {'type': 'loss', 'content': 0.1121109277009964, 'timestamp': '2025-10-01 04:20:06.532216', 'step': 6009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:06.563278', 'step': 6009, 'epoch': 1} {'type': 'loss', 'content': 0.13280928134918213, 'timestamp': '2025-10-01 04:20:06.565456', 'step': 6010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:06.597293', 'step': 6010, 'epoch': 1} {'type': 'loss', 'content': 0.20766125619411469, 'timestamp': '2025-10-01 04:20:06.599407', 'step': 6011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:06.630264', 'step': 6011, 'epoch': 1} {'type': 'loss', 'content': 0.11601486802101135, 'timestamp': '2025-10-01 04:20:06.653879', 'step': 6012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:06.684805', 'step': 6012, 'epoch': 1} {'type': 'loss', 'content': 0.18950383365154266, 'timestamp': '2025-10-01 04:20:06.687030', 'step': 6013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:06.719365', 'step': 6013, 'epoch': 1} {'type': 'loss', 'content': 0.17089183628559113, 'timestamp': '2025-10-01 04:20:06.721439', 'step': 6014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:20:06.753019', 'step': 6014, 'epoch': 1} {'type': 'loss', 'content': 0.103867307305336, 'timestamp': '2025-10-01 04:20:06.757290', 'step': 6015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:06.789572', 'step': 6015, 'epoch': 1} {'type': 'loss', 'content': 0.18462486565113068, 'timestamp': '2025-10-01 04:20:06.813000', 'step': 6016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:06.844218', 'step': 6016, 'epoch': 1} {'type': 'loss', 'content': 0.14297577738761902, 'timestamp': '2025-10-01 04:20:06.846495', 'step': 6017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:06.876863', 'step': 6017, 'epoch': 1} {'type': 'loss', 'content': 0.09159127622842789, 'timestamp': '2025-10-01 04:20:06.879314', 'step': 6018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:06.913229', 'step': 6018, 'epoch': 1} {'type': 'loss', 'content': 0.13742415606975555, 'timestamp': '2025-10-01 04:20:06.915237', 'step': 6019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:06.946152', 'step': 6019, 'epoch': 1} {'type': 'loss', 'content': 0.2350100576877594, 'timestamp': '2025-10-01 04:20:06.970267', 'step': 6020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.002455', 'step': 6020, 'epoch': 1} {'type': 'loss', 'content': 0.19757366180419922, 'timestamp': '2025-10-01 04:20:07.004443', 'step': 6021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.034803', 'step': 6021, 'epoch': 1} {'type': 'loss', 'content': 0.10001827776432037, 'timestamp': '2025-10-01 04:20:07.036977', 'step': 6022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.068693', 'step': 6022, 'epoch': 1} {'type': 'loss', 'content': 0.11977240443229675, 'timestamp': '2025-10-01 04:20:07.072243', 'step': 6023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.103294', 'step': 6023, 'epoch': 1} {'type': 'loss', 'content': 0.1313575953245163, 'timestamp': '2025-10-01 04:20:07.126913', 'step': 6024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.159041', 'step': 6024, 'epoch': 1} {'type': 'loss', 'content': 0.11302643269300461, 'timestamp': '2025-10-01 04:20:07.161055', 'step': 6025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.191892', 'step': 6025, 'epoch': 1} {'type': 'loss', 'content': 0.13124972581863403, 'timestamp': '2025-10-01 04:20:07.193922', 'step': 6026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.225514', 'step': 6026, 'epoch': 1} {'type': 'loss', 'content': 0.0900319442152977, 'timestamp': '2025-10-01 04:20:07.227534', 'step': 6027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.262191', 'step': 6027, 'epoch': 1} {'type': 'loss', 'content': 0.15549783408641815, 'timestamp': '2025-10-01 04:20:07.286079', 'step': 6028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.319379', 'step': 6028, 'epoch': 1} {'type': 'loss', 'content': 0.16613134741783142, 'timestamp': '2025-10-01 04:20:07.322499', 'step': 6029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.354499', 'step': 6029, 'epoch': 1} {'type': 'loss', 'content': 0.14758189022541046, 'timestamp': '2025-10-01 04:20:07.356562', 'step': 6030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.388562', 'step': 6030, 'epoch': 1} {'type': 'loss', 'content': 0.0850314125418663, 'timestamp': '2025-10-01 04:20:07.390556', 'step': 6031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:07.421800', 'step': 6031, 'epoch': 1} {'type': 'loss', 'content': 0.1512584090232849, 'timestamp': '2025-10-01 04:20:07.446080', 'step': 6032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.487512', 'step': 6032, 'epoch': 1} {'type': 'loss', 'content': 0.16379909217357635, 'timestamp': '2025-10-01 04:20:07.489562', 'step': 6033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.520954', 'step': 6033, 'epoch': 1} {'type': 'loss', 'content': 0.17350327968597412, 'timestamp': '2025-10-01 04:20:07.523346', 'step': 6034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:07.555195', 'step': 6034, 'epoch': 1} {'type': 'loss', 'content': 0.226918026804924, 'timestamp': '2025-10-01 04:20:07.557675', 'step': 6035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.589617', 'step': 6035, 'epoch': 1} {'type': 'loss', 'content': 0.25492072105407715, 'timestamp': '2025-10-01 04:20:07.613293', 'step': 6036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.649107', 'step': 6036, 'epoch': 1} {'type': 'loss', 'content': 0.16386198997497559, 'timestamp': '2025-10-01 04:20:07.651204', 'step': 6037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.681628', 'step': 6037, 'epoch': 1} {'type': 'loss', 'content': 0.14023730158805847, 'timestamp': '2025-10-01 04:20:07.683909', 'step': 6038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.727069', 'step': 6038, 'epoch': 1} {'type': 'loss', 'content': 0.15835627913475037, 'timestamp': '2025-10-01 04:20:07.729069', 'step': 6039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.760264', 'step': 6039, 'epoch': 1} {'type': 'loss', 'content': 0.09504858404397964, 'timestamp': '2025-10-01 04:20:07.784247', 'step': 6040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:07.815186', 'step': 6040, 'epoch': 1} {'type': 'loss', 'content': 0.162775456905365, 'timestamp': '2025-10-01 04:20:07.823089', 'step': 6041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.853269', 'step': 6041, 'epoch': 1} {'type': 'loss', 'content': 0.1663297861814499, 'timestamp': '2025-10-01 04:20:07.855443', 'step': 6042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.886687', 'step': 6042, 'epoch': 1} {'type': 'loss', 'content': 0.2146061807870865, 'timestamp': '2025-10-01 04:20:07.888810', 'step': 6043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:07.919593', 'step': 6043, 'epoch': 1} {'type': 'loss', 'content': 0.09554652869701385, 'timestamp': '2025-10-01 04:20:07.943396', 'step': 6044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:07.975258', 'step': 6044, 'epoch': 1} {'type': 'loss', 'content': 0.13101018965244293, 'timestamp': '2025-10-01 04:20:07.977415', 'step': 6045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:08.008412', 'step': 6045, 'epoch': 1} {'type': 'loss', 'content': 0.1367412656545639, 'timestamp': '2025-10-01 04:20:08.010426', 'step': 6046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:08.043458', 'step': 6046, 'epoch': 1} {'type': 'loss', 'content': 0.08643580228090286, 'timestamp': '2025-10-01 04:20:08.045453', 'step': 6047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:08.099292', 'step': 6047, 'epoch': 1} {'type': 'loss', 'content': 0.12145364284515381, 'timestamp': '2025-10-01 04:20:08.123265', 'step': 6048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:08.155095', 'step': 6048, 'epoch': 1} {'type': 'loss', 'content': 0.07181298732757568, 'timestamp': '2025-10-01 04:20:08.157265', 'step': 6049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:08.199595', 'step': 6049, 'epoch': 1} {'type': 'loss', 'content': 0.153123676776886, 'timestamp': '2025-10-01 04:20:08.201866', 'step': 6050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:08.233999', 'step': 6050, 'epoch': 1} {'type': 'loss', 'content': 0.2416207492351532, 'timestamp': '2025-10-01 04:20:08.236845', 'step': 6051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:08.270164', 'step': 6051, 'epoch': 1} {'type': 'loss', 'content': 0.12853731215000153, 'timestamp': '2025-10-01 04:20:08.294138', 'step': 6052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:08.326693', 'step': 6052, 'epoch': 1} {'type': 'loss', 'content': 0.09438731521368027, 'timestamp': '2025-10-01 04:20:08.328976', 'step': 6053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:08.361004', 'step': 6053, 'epoch': 1} {'type': 'loss', 'content': 0.09621171653270721, 'timestamp': '2025-10-01 04:20:08.363015', 'step': 6054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:08.394384', 'step': 6054, 'epoch': 1} {'type': 'loss', 'content': 0.1621439903974533, 'timestamp': '2025-10-01 04:20:08.396312', 'step': 6055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:08.428031', 'step': 6055, 'epoch': 1} {'type': 'loss', 'content': 0.1969301998615265, 'timestamp': '2025-10-01 04:20:08.451478', 'step': 6056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:08.483146', 'step': 6056, 'epoch': 1} {'type': 'loss', 'content': 0.09531843662261963, 'timestamp': '2025-10-01 04:20:08.485317', 'step': 6057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:08.516005', 'step': 6057, 'epoch': 1} {'type': 'loss', 'content': 0.17268866300582886, 'timestamp': '2025-10-01 04:20:08.518121', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:20:17.269275', 'step': 6058, 'epoch': 1} {'type': 'pplx', 'content': 8293.723486767516, 'timestamp': '2025-10-01 04:20:17.272132', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:17.302038', 'step': 6058, 'epoch': 1} {'type': 'loss', 'content': 0.32019779086112976, 'timestamp': '2025-10-01 04:20:17.304648', 'step': 6059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:17.335377', 'step': 6059, 'epoch': 1} {'type': 'loss', 'content': 0.14585326611995697, 'timestamp': '2025-10-01 04:20:17.359185', 'step': 6060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:17.393156', 'step': 6060, 'epoch': 1} {'type': 'loss', 'content': 0.17047789692878723, 'timestamp': '2025-10-01 04:20:17.397433', 'step': 6061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:17.428561', 'step': 6061, 'epoch': 1} {'type': 'loss', 'content': 0.19550779461860657, 'timestamp': '2025-10-01 04:20:17.432802', 'step': 6062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:17.464472', 'step': 6062, 'epoch': 1} {'type': 'loss', 'content': 0.06510791182518005, 'timestamp': '2025-10-01 04:20:17.466632', 'step': 6063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:17.497571', 'step': 6063, 'epoch': 1} {'type': 'loss', 'content': 0.09121106564998627, 'timestamp': '2025-10-01 04:20:17.521015', 'step': 6064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:17.551623', 'step': 6064, 'epoch': 1} {'type': 'loss', 'content': 0.19760510325431824, 'timestamp': '2025-10-01 04:20:17.556777', 'step': 6065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:17.596137', 'step': 6065, 'epoch': 1} {'type': 'loss', 'content': 0.08454856276512146, 'timestamp': '2025-10-01 04:20:17.598420', 'step': 6066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:17.629513', 'step': 6066, 'epoch': 1} {'type': 'loss', 'content': 0.1696772575378418, 'timestamp': '2025-10-01 04:20:17.631880', 'step': 6067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:17.663868', 'step': 6067, 'epoch': 1} {'type': 'loss', 'content': 0.11233381181955338, 'timestamp': '2025-10-01 04:20:17.687448', 'step': 6068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:17.720520', 'step': 6068, 'epoch': 1} {'type': 'loss', 'content': 0.19822390377521515, 'timestamp': '2025-10-01 04:20:17.722869', 'step': 6069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:17.755877', 'step': 6069, 'epoch': 1} {'type': 'loss', 'content': 0.1241680458188057, 'timestamp': '2025-10-01 04:20:17.758075', 'step': 6070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:17.792951', 'step': 6070, 'epoch': 1} {'type': 'loss', 'content': 0.2286795675754547, 'timestamp': '2025-10-01 04:20:17.795619', 'step': 6071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:17.827755', 'step': 6071, 'epoch': 1} {'type': 'loss', 'content': 0.12768524885177612, 'timestamp': '2025-10-01 04:20:17.851266', 'step': 6072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:17.882278', 'step': 6072, 'epoch': 1} {'type': 'loss', 'content': 0.12526974081993103, 'timestamp': '2025-10-01 04:20:17.884367', 'step': 6073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:17.914840', 'step': 6073, 'epoch': 1} {'type': 'loss', 'content': 0.12827670574188232, 'timestamp': '2025-10-01 04:20:17.917372', 'step': 6074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:17.950549', 'step': 6074, 'epoch': 1} {'type': 'loss', 'content': 0.1482873409986496, 'timestamp': '2025-10-01 04:20:17.952661', 'step': 6075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:17.984916', 'step': 6075, 'epoch': 1} {'type': 'loss', 'content': 0.20914044976234436, 'timestamp': '2025-10-01 04:20:18.015869', 'step': 6076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:18.045705', 'step': 6076, 'epoch': 1} {'type': 'loss', 'content': 0.08954942971467972, 'timestamp': '2025-10-01 04:20:18.047629', 'step': 6077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.077897', 'step': 6077, 'epoch': 1} {'type': 'loss', 'content': 0.1080695390701294, 'timestamp': '2025-10-01 04:20:18.080001', 'step': 6078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.110468', 'step': 6078, 'epoch': 1} {'type': 'loss', 'content': 0.1421632468700409, 'timestamp': '2025-10-01 04:20:18.112466', 'step': 6079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.145844', 'step': 6079, 'epoch': 1} {'type': 'loss', 'content': 0.1772087961435318, 'timestamp': '2025-10-01 04:20:18.170443', 'step': 6080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:18.201445', 'step': 6080, 'epoch': 1} {'type': 'loss', 'content': 0.20968283712863922, 'timestamp': '2025-10-01 04:20:18.204051', 'step': 6081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:18.234946', 'step': 6081, 'epoch': 1} {'type': 'loss', 'content': 0.09247586131095886, 'timestamp': '2025-10-01 04:20:18.237420', 'step': 6082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:18.269401', 'step': 6082, 'epoch': 1} {'type': 'loss', 'content': 0.15055085718631744, 'timestamp': '2025-10-01 04:20:18.271312', 'step': 6083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.304195', 'step': 6083, 'epoch': 1} {'type': 'loss', 'content': 0.1257370263338089, 'timestamp': '2025-10-01 04:20:18.327659', 'step': 6084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:18.357617', 'step': 6084, 'epoch': 1} {'type': 'loss', 'content': 0.19431079924106598, 'timestamp': '2025-10-01 04:20:18.359640', 'step': 6085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.391123', 'step': 6085, 'epoch': 1} {'type': 'loss', 'content': 0.18474707007408142, 'timestamp': '2025-10-01 04:20:18.393036', 'step': 6086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:18.424656', 'step': 6086, 'epoch': 1} {'type': 'loss', 'content': 0.29601573944091797, 'timestamp': '2025-10-01 04:20:18.427368', 'step': 6087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:18.458846', 'step': 6087, 'epoch': 1} {'type': 'loss', 'content': 0.20860303938388824, 'timestamp': '2025-10-01 04:20:18.482406', 'step': 6088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.518375', 'step': 6088, 'epoch': 1} {'type': 'loss', 'content': 0.0792737826704979, 'timestamp': '2025-10-01 04:20:18.520646', 'step': 6089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.556352', 'step': 6089, 'epoch': 1} {'type': 'loss', 'content': 0.14790545403957367, 'timestamp': '2025-10-01 04:20:18.558273', 'step': 6090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:18.591716', 'step': 6090, 'epoch': 1} {'type': 'loss', 'content': 0.13000737130641937, 'timestamp': '2025-10-01 04:20:18.593788', 'step': 6091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:18.628025', 'step': 6091, 'epoch': 1} {'type': 'loss', 'content': 0.2097475677728653, 'timestamp': '2025-10-01 04:20:18.651847', 'step': 6092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.683909', 'step': 6092, 'epoch': 1} {'type': 'loss', 'content': 0.11041643470525742, 'timestamp': '2025-10-01 04:20:18.685866', 'step': 6093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.720117', 'step': 6093, 'epoch': 1} {'type': 'loss', 'content': 0.11492404341697693, 'timestamp': '2025-10-01 04:20:18.722100', 'step': 6094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.757745', 'step': 6094, 'epoch': 1} {'type': 'loss', 'content': 0.24463678896427155, 'timestamp': '2025-10-01 04:20:18.760062', 'step': 6095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.797541', 'step': 6095, 'epoch': 1} {'type': 'loss', 'content': 0.22755493223667145, 'timestamp': '2025-10-01 04:20:18.821332', 'step': 6096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:18.854271', 'step': 6096, 'epoch': 1} {'type': 'loss', 'content': 0.12992458045482635, 'timestamp': '2025-10-01 04:20:18.856313', 'step': 6097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:18.888295', 'step': 6097, 'epoch': 1} {'type': 'loss', 'content': 0.12466727942228317, 'timestamp': '2025-10-01 04:20:18.890155', 'step': 6098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:18.925603', 'step': 6098, 'epoch': 1} {'type': 'loss', 'content': 0.1378656029701233, 'timestamp': '2025-10-01 04:20:18.935333', 'step': 6099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:18.978952', 'step': 6099, 'epoch': 1} {'type': 'loss', 'content': 0.123626708984375, 'timestamp': '2025-10-01 04:20:19.006756', 'step': 6100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:19.042025', 'step': 6100, 'epoch': 1} {'type': 'loss', 'content': 0.11863864213228226, 'timestamp': '2025-10-01 04:20:19.043957', 'step': 6101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:19.074410', 'step': 6101, 'epoch': 1} {'type': 'loss', 'content': 0.11340762674808502, 'timestamp': '2025-10-01 04:20:19.076532', 'step': 6102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:19.110811', 'step': 6102, 'epoch': 1} {'type': 'loss', 'content': 0.1281067430973053, 'timestamp': '2025-10-01 04:20:19.112742', 'step': 6103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:19.143217', 'step': 6103, 'epoch': 1} {'type': 'loss', 'content': 0.18361538648605347, 'timestamp': '2025-10-01 04:20:19.166669', 'step': 6104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:19.200208', 'step': 6104, 'epoch': 1} {'type': 'loss', 'content': 0.1500672847032547, 'timestamp': '2025-10-01 04:20:19.202044', 'step': 6105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:19.232256', 'step': 6105, 'epoch': 1} {'type': 'loss', 'content': 0.13566836714744568, 'timestamp': '2025-10-01 04:20:19.234388', 'step': 6106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:19.269017', 'step': 6106, 'epoch': 1} {'type': 'loss', 'content': 0.20381300151348114, 'timestamp': '2025-10-01 04:20:19.271388', 'step': 6107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:19.301680', 'step': 6107, 'epoch': 1} {'type': 'loss', 'content': 0.19426773488521576, 'timestamp': '2025-10-01 04:20:19.325125', 'step': 6108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:19.356508', 'step': 6108, 'epoch': 1} {'type': 'loss', 'content': 0.1890079528093338, 'timestamp': '2025-10-01 04:20:19.358382', 'step': 6109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:19.392336', 'step': 6109, 'epoch': 1} {'type': 'loss', 'content': 0.13014379143714905, 'timestamp': '2025-10-01 04:20:19.394536', 'step': 6110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:19.427662', 'step': 6110, 'epoch': 1} {'type': 'loss', 'content': 0.15998287498950958, 'timestamp': '2025-10-01 04:20:19.430160', 'step': 6111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:19.462513', 'step': 6111, 'epoch': 1} {'type': 'loss', 'content': 0.16254736483097076, 'timestamp': '2025-10-01 04:20:19.486145', 'step': 6112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:19.518601', 'step': 6112, 'epoch': 1} {'type': 'loss', 'content': 0.14522700011730194, 'timestamp': '2025-10-01 04:20:19.520618', 'step': 6113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:19.550428', 'step': 6113, 'epoch': 1} {'type': 'loss', 'content': 0.13227999210357666, 'timestamp': '2025-10-01 04:20:19.552815', 'step': 6114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:19.589143', 'step': 6114, 'epoch': 1} {'type': 'loss', 'content': 0.11200705170631409, 'timestamp': '2025-10-01 04:20:19.591361', 'step': 6115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:19.623593', 'step': 6115, 'epoch': 1} {'type': 'loss', 'content': 0.1773819476366043, 'timestamp': '2025-10-01 04:20:19.647278', 'step': 6116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:19.680996', 'step': 6116, 'epoch': 1} {'type': 'loss', 'content': 0.15848135948181152, 'timestamp': '2025-10-01 04:20:19.683038', 'step': 6117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:19.716018', 'step': 6117, 'epoch': 1} {'type': 'loss', 'content': 0.12619659304618835, 'timestamp': '2025-10-01 04:20:19.718019', 'step': 6118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:19.754333', 'step': 6118, 'epoch': 1} {'type': 'loss', 'content': 0.16429778933525085, 'timestamp': '2025-10-01 04:20:19.756658', 'step': 6119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:19.786462', 'step': 6119, 'epoch': 1} {'type': 'loss', 'content': 0.1997498720884323, 'timestamp': '2025-10-01 04:20:19.809798', 'step': 6120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:19.840000', 'step': 6120, 'epoch': 1} {'type': 'loss', 'content': 0.09762442857027054, 'timestamp': '2025-10-01 04:20:19.842000', 'step': 6121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:19.872267', 'step': 6121, 'epoch': 1} {'type': 'loss', 'content': 0.1916538029909134, 'timestamp': '2025-10-01 04:20:19.874631', 'step': 6122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:19.905899', 'step': 6122, 'epoch': 1} {'type': 'loss', 'content': 0.2027902752161026, 'timestamp': '2025-10-01 04:20:19.908001', 'step': 6123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:19.939916', 'step': 6123, 'epoch': 1} {'type': 'loss', 'content': 0.09299345314502716, 'timestamp': '2025-10-01 04:20:19.963996', 'step': 6124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:20.002056', 'step': 6124, 'epoch': 1} {'type': 'loss', 'content': 0.24295958876609802, 'timestamp': '2025-10-01 04:20:20.004570', 'step': 6125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:20.041088', 'step': 6125, 'epoch': 1} {'type': 'loss', 'content': 0.18996502459049225, 'timestamp': '2025-10-01 04:20:20.043242', 'step': 6126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:20.084280', 'step': 6126, 'epoch': 1} {'type': 'loss', 'content': 0.09748334437608719, 'timestamp': '2025-10-01 04:20:20.086654', 'step': 6127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:20.123985', 'step': 6127, 'epoch': 1} {'type': 'loss', 'content': 0.204793319106102, 'timestamp': '2025-10-01 04:20:20.150960', 'step': 6128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:20.194317', 'step': 6128, 'epoch': 1} {'type': 'loss', 'content': 0.11404121667146683, 'timestamp': '2025-10-01 04:20:20.198457', 'step': 6129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:20.244941', 'step': 6129, 'epoch': 1} {'type': 'loss', 'content': 0.16728255152702332, 'timestamp': '2025-10-01 04:20:20.247316', 'step': 6130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:20.282948', 'step': 6130, 'epoch': 1} {'type': 'loss', 'content': 0.10727714002132416, 'timestamp': '2025-10-01 04:20:20.289760', 'step': 6131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:20.326553', 'step': 6131, 'epoch': 1} {'type': 'loss', 'content': 0.29202374815940857, 'timestamp': '2025-10-01 04:20:20.350331', 'step': 6132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:20.414466', 'step': 6132, 'epoch': 1} {'type': 'loss', 'content': 0.12480270862579346, 'timestamp': '2025-10-01 04:20:20.416958', 'step': 6133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:20.450828', 'step': 6133, 'epoch': 1} {'type': 'loss', 'content': 0.11144273728132248, 'timestamp': '2025-10-01 04:20:20.453282', 'step': 6134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:20.504788', 'step': 6134, 'epoch': 1} {'type': 'loss', 'content': 0.1857304871082306, 'timestamp': '2025-10-01 04:20:20.506996', 'step': 6135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:20.558165', 'step': 6135, 'epoch': 1} {'type': 'loss', 'content': 0.1239725723862648, 'timestamp': '2025-10-01 04:20:20.581713', 'step': 6136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:20.613988', 'step': 6136, 'epoch': 1} {'type': 'loss', 'content': 0.146450936794281, 'timestamp': '2025-10-01 04:20:20.617343', 'step': 6137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:20.653697', 'step': 6137, 'epoch': 1} {'type': 'loss', 'content': 0.23096971213817596, 'timestamp': '2025-10-01 04:20:20.655893', 'step': 6138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:20.706047', 'step': 6138, 'epoch': 1} {'type': 'loss', 'content': 0.23500192165374756, 'timestamp': '2025-10-01 04:20:20.708048', 'step': 6139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:20.741138', 'step': 6139, 'epoch': 1} {'type': 'loss', 'content': 0.10292200744152069, 'timestamp': '2025-10-01 04:20:20.764900', 'step': 6140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:20.812176', 'step': 6140, 'epoch': 1} {'type': 'loss', 'content': 0.15525829792022705, 'timestamp': '2025-10-01 04:20:20.819308', 'step': 6141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:20.878006', 'step': 6141, 'epoch': 1} {'type': 'loss', 'content': 0.24150152504444122, 'timestamp': '2025-10-01 04:20:20.880405', 'step': 6142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:20.926110', 'step': 6142, 'epoch': 1} {'type': 'loss', 'content': 0.16936391592025757, 'timestamp': '2025-10-01 04:20:20.928365', 'step': 6143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:20.969255', 'step': 6143, 'epoch': 1} {'type': 'loss', 'content': 0.09456213563680649, 'timestamp': '2025-10-01 04:20:20.995543', 'step': 6144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:21.045230', 'step': 6144, 'epoch': 1} {'type': 'loss', 'content': 0.22729197144508362, 'timestamp': '2025-10-01 04:20:21.047196', 'step': 6145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:21.080330', 'step': 6145, 'epoch': 1} {'type': 'loss', 'content': 0.1246214210987091, 'timestamp': '2025-10-01 04:20:21.085620', 'step': 6146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:21.123835', 'step': 6146, 'epoch': 1} {'type': 'loss', 'content': 0.17494498193264008, 'timestamp': '2025-10-01 04:20:21.126430', 'step': 6147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:21.162517', 'step': 6147, 'epoch': 1} {'type': 'loss', 'content': 0.15006433427333832, 'timestamp': '2025-10-01 04:20:21.186095', 'step': 6148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:21.217521', 'step': 6148, 'epoch': 1} {'type': 'loss', 'content': 0.12932531535625458, 'timestamp': '2025-10-01 04:20:21.219583', 'step': 6149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:21.251354', 'step': 6149, 'epoch': 1} {'type': 'loss', 'content': 0.15862329304218292, 'timestamp': '2025-10-01 04:20:21.253427', 'step': 6150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:21.284682', 'step': 6150, 'epoch': 1} {'type': 'loss', 'content': 0.20857566595077515, 'timestamp': '2025-10-01 04:20:21.286751', 'step': 6151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:21.317787', 'step': 6151, 'epoch': 1} {'type': 'loss', 'content': 0.15454649925231934, 'timestamp': '2025-10-01 04:20:21.342426', 'step': 6152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:21.374556', 'step': 6152, 'epoch': 1} {'type': 'loss', 'content': 0.25594061613082886, 'timestamp': '2025-10-01 04:20:21.376802', 'step': 6153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:21.407993', 'step': 6153, 'epoch': 1} {'type': 'loss', 'content': 0.1407773345708847, 'timestamp': '2025-10-01 04:20:21.410094', 'step': 6154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:21.441087', 'step': 6154, 'epoch': 1} {'type': 'loss', 'content': 0.1836850941181183, 'timestamp': '2025-10-01 04:20:21.443443', 'step': 6155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:21.475111', 'step': 6155, 'epoch': 1} {'type': 'loss', 'content': 0.21088136732578278, 'timestamp': '2025-10-01 04:20:21.498499', 'step': 6156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:21.529294', 'step': 6156, 'epoch': 1} {'type': 'loss', 'content': 0.15965864062309265, 'timestamp': '2025-10-01 04:20:21.531270', 'step': 6157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:21.561597', 'step': 6157, 'epoch': 1} {'type': 'loss', 'content': 0.29642733931541443, 'timestamp': '2025-10-01 04:20:21.563740', 'step': 6158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:21.597564', 'step': 6158, 'epoch': 1} {'type': 'loss', 'content': 0.17813743650913239, 'timestamp': '2025-10-01 04:20:21.599558', 'step': 6159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:21.630518', 'step': 6159, 'epoch': 1} {'type': 'loss', 'content': 0.10938233137130737, 'timestamp': '2025-10-01 04:20:21.654073', 'step': 6160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:21.685648', 'step': 6160, 'epoch': 1} {'type': 'loss', 'content': 0.09221868962049484, 'timestamp': '2025-10-01 04:20:21.687878', 'step': 6161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:21.719171', 'step': 6161, 'epoch': 1} {'type': 'loss', 'content': 0.1524454802274704, 'timestamp': '2025-10-01 04:20:21.721265', 'step': 6162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:21.753409', 'step': 6162, 'epoch': 1} {'type': 'loss', 'content': 0.15770511329174042, 'timestamp': '2025-10-01 04:20:21.755380', 'step': 6163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:21.786913', 'step': 6163, 'epoch': 1} {'type': 'loss', 'content': 0.16691981256008148, 'timestamp': '2025-10-01 04:20:21.810390', 'step': 6164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:21.840687', 'step': 6164, 'epoch': 1} {'type': 'loss', 'content': 0.16525541245937347, 'timestamp': '2025-10-01 04:20:21.842756', 'step': 6165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:21.873670', 'step': 6165, 'epoch': 1} {'type': 'loss', 'content': 0.07767041027545929, 'timestamp': '2025-10-01 04:20:21.875645', 'step': 6166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:21.905984', 'step': 6166, 'epoch': 1} {'type': 'loss', 'content': 0.15285886824131012, 'timestamp': '2025-10-01 04:20:21.907986', 'step': 6167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:21.938411', 'step': 6167, 'epoch': 1} {'type': 'loss', 'content': 0.12411338835954666, 'timestamp': '2025-10-01 04:20:21.962112', 'step': 6168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:21.996108', 'step': 6168, 'epoch': 1} {'type': 'loss', 'content': 0.17474861443042755, 'timestamp': '2025-10-01 04:20:21.998194', 'step': 6169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:22.030124', 'step': 6169, 'epoch': 1} {'type': 'loss', 'content': 0.11302489042282104, 'timestamp': '2025-10-01 04:20:22.032088', 'step': 6170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:22.062129', 'step': 6170, 'epoch': 1} {'type': 'loss', 'content': 0.15546520054340363, 'timestamp': '2025-10-01 04:20:22.064154', 'step': 6171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:22.097414', 'step': 6171, 'epoch': 1} {'type': 'loss', 'content': 0.12129484862089157, 'timestamp': '2025-10-01 04:20:22.122824', 'step': 6172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:22.155828', 'step': 6172, 'epoch': 1} {'type': 'loss', 'content': 0.10343523323535919, 'timestamp': '2025-10-01 04:20:22.157860', 'step': 6173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:22.192191', 'step': 6173, 'epoch': 1} {'type': 'loss', 'content': 0.21684186160564423, 'timestamp': '2025-10-01 04:20:22.194891', 'step': 6174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:22.226623', 'step': 6174, 'epoch': 1} {'type': 'loss', 'content': 0.15994656085968018, 'timestamp': '2025-10-01 04:20:22.233360', 'step': 6175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:22.264374', 'step': 6175, 'epoch': 1} {'type': 'loss', 'content': 0.13151319324970245, 'timestamp': '2025-10-01 04:20:22.287894', 'step': 6176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:22.318990', 'step': 6176, 'epoch': 1} {'type': 'loss', 'content': 0.1337975114583969, 'timestamp': '2025-10-01 04:20:22.320973', 'step': 6177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:22.353404', 'step': 6177, 'epoch': 1} {'type': 'loss', 'content': 0.2124391347169876, 'timestamp': '2025-10-01 04:20:22.355480', 'step': 6178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:22.386619', 'step': 6178, 'epoch': 1} {'type': 'loss', 'content': 0.11875090003013611, 'timestamp': '2025-10-01 04:20:22.394206', 'step': 6179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:22.425386', 'step': 6179, 'epoch': 1} {'type': 'loss', 'content': 0.21683156490325928, 'timestamp': '2025-10-01 04:20:22.448983', 'step': 6180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:22.486627', 'step': 6180, 'epoch': 1} {'type': 'loss', 'content': 0.12209875136613846, 'timestamp': '2025-10-01 04:20:22.489643', 'step': 6181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:22.522758', 'step': 6181, 'epoch': 1} {'type': 'loss', 'content': 0.12477771192789078, 'timestamp': '2025-10-01 04:20:22.524953', 'step': 6182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:22.557337', 'step': 6182, 'epoch': 1} {'type': 'loss', 'content': 0.15593744814395905, 'timestamp': '2025-10-01 04:20:22.559760', 'step': 6183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:22.594976', 'step': 6183, 'epoch': 1} {'type': 'loss', 'content': 0.17094099521636963, 'timestamp': '2025-10-01 04:20:22.619207', 'step': 6184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:22.649303', 'step': 6184, 'epoch': 1} {'type': 'loss', 'content': 0.16505490243434906, 'timestamp': '2025-10-01 04:20:22.651360', 'step': 6185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:22.683010', 'step': 6185, 'epoch': 1} {'type': 'loss', 'content': 0.10777422040700912, 'timestamp': '2025-10-01 04:20:22.685873', 'step': 6186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:22.718725', 'step': 6186, 'epoch': 1} {'type': 'loss', 'content': 0.2022017240524292, 'timestamp': '2025-10-01 04:20:22.720746', 'step': 6187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:22.752073', 'step': 6187, 'epoch': 1} {'type': 'loss', 'content': 0.11092737317085266, 'timestamp': '2025-10-01 04:20:22.775739', 'step': 6188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:22.811619', 'step': 6188, 'epoch': 1} {'type': 'loss', 'content': 0.06664464622735977, 'timestamp': '2025-10-01 04:20:22.813782', 'step': 6189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:22.850687', 'step': 6189, 'epoch': 1} {'type': 'loss', 'content': 0.21206821501255035, 'timestamp': '2025-10-01 04:20:22.854600', 'step': 6190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:22.887844', 'step': 6190, 'epoch': 1} {'type': 'loss', 'content': 0.09292648732662201, 'timestamp': '2025-10-01 04:20:22.889821', 'step': 6191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:22.921213', 'step': 6191, 'epoch': 1} {'type': 'loss', 'content': 0.2240086942911148, 'timestamp': '2025-10-01 04:20:22.945027', 'step': 6192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:22.976522', 'step': 6192, 'epoch': 1} {'type': 'loss', 'content': 0.21950924396514893, 'timestamp': '2025-10-01 04:20:22.978643', 'step': 6193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.009610', 'step': 6193, 'epoch': 1} {'type': 'loss', 'content': 0.19583599269390106, 'timestamp': '2025-10-01 04:20:23.019190', 'step': 6194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:23.051006', 'step': 6194, 'epoch': 1} {'type': 'loss', 'content': 0.16135551035404205, 'timestamp': '2025-10-01 04:20:23.053036', 'step': 6195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.085676', 'step': 6195, 'epoch': 1} {'type': 'loss', 'content': 0.09249497205018997, 'timestamp': '2025-10-01 04:20:23.109332', 'step': 6196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.140136', 'step': 6196, 'epoch': 1} {'type': 'loss', 'content': 0.1764797866344452, 'timestamp': '2025-10-01 04:20:23.145796', 'step': 6197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:23.176594', 'step': 6197, 'epoch': 1} {'type': 'loss', 'content': 0.09477169811725616, 'timestamp': '2025-10-01 04:20:23.182582', 'step': 6198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.214688', 'step': 6198, 'epoch': 1} {'type': 'loss', 'content': 0.10223239660263062, 'timestamp': '2025-10-01 04:20:23.216703', 'step': 6199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:23.249103', 'step': 6199, 'epoch': 1} {'type': 'loss', 'content': 0.10678133368492126, 'timestamp': '2025-10-01 04:20:23.272525', 'step': 6200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.302847', 'step': 6200, 'epoch': 1} {'type': 'loss', 'content': 0.2174510955810547, 'timestamp': '2025-10-01 04:20:23.304919', 'step': 6201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:23.335224', 'step': 6201, 'epoch': 1} {'type': 'loss', 'content': 0.13678160309791565, 'timestamp': '2025-10-01 04:20:23.337091', 'step': 6202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.367895', 'step': 6202, 'epoch': 1} {'type': 'loss', 'content': 0.15294711291790009, 'timestamp': '2025-10-01 04:20:23.369748', 'step': 6203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:23.399813', 'step': 6203, 'epoch': 1} {'type': 'loss', 'content': 0.16743989288806915, 'timestamp': '2025-10-01 04:20:23.423451', 'step': 6204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:23.454409', 'step': 6204, 'epoch': 1} {'type': 'loss', 'content': 0.1309054046869278, 'timestamp': '2025-10-01 04:20:23.456644', 'step': 6205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:23.489078', 'step': 6205, 'epoch': 1} {'type': 'loss', 'content': 0.2608473300933838, 'timestamp': '2025-10-01 04:20:23.491030', 'step': 6206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.527620', 'step': 6206, 'epoch': 1} {'type': 'loss', 'content': 0.12897580862045288, 'timestamp': '2025-10-01 04:20:23.529803', 'step': 6207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:23.560015', 'step': 6207, 'epoch': 1} {'type': 'loss', 'content': 0.1219325140118599, 'timestamp': '2025-10-01 04:20:23.584057', 'step': 6208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:23.616205', 'step': 6208, 'epoch': 1} {'type': 'loss', 'content': 0.19409991800785065, 'timestamp': '2025-10-01 04:20:23.618160', 'step': 6209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:23.650885', 'step': 6209, 'epoch': 1} {'type': 'loss', 'content': 0.18013504147529602, 'timestamp': '2025-10-01 04:20:23.653143', 'step': 6210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.685411', 'step': 6210, 'epoch': 1} {'type': 'loss', 'content': 0.18702228367328644, 'timestamp': '2025-10-01 04:20:23.687887', 'step': 6211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:23.721423', 'step': 6211, 'epoch': 1} {'type': 'loss', 'content': 0.16399002075195312, 'timestamp': '2025-10-01 04:20:23.745043', 'step': 6212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.775843', 'step': 6212, 'epoch': 1} {'type': 'loss', 'content': 0.16812540590763092, 'timestamp': '2025-10-01 04:20:23.778026', 'step': 6213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:23.809808', 'step': 6213, 'epoch': 1} {'type': 'loss', 'content': 0.13436906039714813, 'timestamp': '2025-10-01 04:20:23.812281', 'step': 6214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.844449', 'step': 6214, 'epoch': 1} {'type': 'loss', 'content': 0.09310286492109299, 'timestamp': '2025-10-01 04:20:23.846458', 'step': 6215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:23.877181', 'step': 6215, 'epoch': 1} {'type': 'loss', 'content': 0.15067662298679352, 'timestamp': '2025-10-01 04:20:23.900751', 'step': 6216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:23.931153', 'step': 6216, 'epoch': 1} {'type': 'loss', 'content': 0.17084166407585144, 'timestamp': '2025-10-01 04:20:23.933186', 'step': 6217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:23.964469', 'step': 6217, 'epoch': 1} {'type': 'loss', 'content': 0.26788565516471863, 'timestamp': '2025-10-01 04:20:23.966324', 'step': 6218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:23.998350', 'step': 6218, 'epoch': 1} {'type': 'loss', 'content': 0.33359310030937195, 'timestamp': '2025-10-01 04:20:24.000981', 'step': 6219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:24.033158', 'step': 6219, 'epoch': 1} {'type': 'loss', 'content': 0.12542466819286346, 'timestamp': '2025-10-01 04:20:24.061608', 'step': 6220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:24.092807', 'step': 6220, 'epoch': 1} {'type': 'loss', 'content': 0.1450374722480774, 'timestamp': '2025-10-01 04:20:24.094649', 'step': 6221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:24.125618', 'step': 6221, 'epoch': 1} {'type': 'loss', 'content': 0.14270009100437164, 'timestamp': '2025-10-01 04:20:24.127634', 'step': 6222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:24.161306', 'step': 6222, 'epoch': 1} {'type': 'loss', 'content': 0.10102511197328568, 'timestamp': '2025-10-01 04:20:24.163531', 'step': 6223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:24.208566', 'step': 6223, 'epoch': 1} {'type': 'loss', 'content': 0.10689377039670944, 'timestamp': '2025-10-01 04:20:24.232190', 'step': 6224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:24.262406', 'step': 6224, 'epoch': 1} {'type': 'loss', 'content': 0.09651314467191696, 'timestamp': '2025-10-01 04:20:24.264422', 'step': 6225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:24.298808', 'step': 6225, 'epoch': 1} {'type': 'loss', 'content': 0.15010428428649902, 'timestamp': '2025-10-01 04:20:24.301746', 'step': 6226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:24.332384', 'step': 6226, 'epoch': 1} {'type': 'loss', 'content': 0.21529768407344818, 'timestamp': '2025-10-01 04:20:24.334851', 'step': 6227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:24.365147', 'step': 6227, 'epoch': 1} {'type': 'loss', 'content': 0.26971882581710815, 'timestamp': '2025-10-01 04:20:24.388637', 'step': 6228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:24.418688', 'step': 6228, 'epoch': 1} {'type': 'loss', 'content': 0.1203477680683136, 'timestamp': '2025-10-01 04:20:24.420933', 'step': 6229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:24.453717', 'step': 6229, 'epoch': 1} {'type': 'loss', 'content': 0.13729193806648254, 'timestamp': '2025-10-01 04:20:24.455918', 'step': 6230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:24.485980', 'step': 6230, 'epoch': 1} {'type': 'loss', 'content': 0.12085074931383133, 'timestamp': '2025-10-01 04:20:24.488419', 'step': 6231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:24.520597', 'step': 6231, 'epoch': 1} {'type': 'loss', 'content': 0.257594496011734, 'timestamp': '2025-10-01 04:20:24.544831', 'step': 6232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:24.575387', 'step': 6232, 'epoch': 1} {'type': 'loss', 'content': 0.10280770808458328, 'timestamp': '2025-10-01 04:20:24.579708', 'step': 6233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:24.611204', 'step': 6233, 'epoch': 1} {'type': 'loss', 'content': 0.15147823095321655, 'timestamp': '2025-10-01 04:20:24.613851', 'step': 6234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:24.645444', 'step': 6234, 'epoch': 1} {'type': 'loss', 'content': 0.17269869148731232, 'timestamp': '2025-10-01 04:20:24.648955', 'step': 6235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:24.680860', 'step': 6235, 'epoch': 1} {'type': 'loss', 'content': 0.14296485483646393, 'timestamp': '2025-10-01 04:20:24.704327', 'step': 6236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:24.744407', 'step': 6236, 'epoch': 1} {'type': 'loss', 'content': 0.13911926746368408, 'timestamp': '2025-10-01 04:20:24.746431', 'step': 6237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:24.777991', 'step': 6237, 'epoch': 1} {'type': 'loss', 'content': 0.09550657868385315, 'timestamp': '2025-10-01 04:20:24.780767', 'step': 6238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:24.813160', 'step': 6238, 'epoch': 1} {'type': 'loss', 'content': 0.24932239949703217, 'timestamp': '2025-10-01 04:20:24.815050', 'step': 6239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:24.846686', 'step': 6239, 'epoch': 1} {'type': 'loss', 'content': 0.18390801548957825, 'timestamp': '2025-10-01 04:20:24.870443', 'step': 6240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:24.907476', 'step': 6240, 'epoch': 1} {'type': 'loss', 'content': 0.16337016224861145, 'timestamp': '2025-10-01 04:20:24.914870', 'step': 6241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:24.947325', 'step': 6241, 'epoch': 1} {'type': 'loss', 'content': 0.2427433729171753, 'timestamp': '2025-10-01 04:20:24.951085', 'step': 6242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:24.986856', 'step': 6242, 'epoch': 1} {'type': 'loss', 'content': 0.1262786090373993, 'timestamp': '2025-10-01 04:20:24.989336', 'step': 6243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:25.033857', 'step': 6243, 'epoch': 1} {'type': 'loss', 'content': 0.18141140043735504, 'timestamp': '2025-10-01 04:20:25.057548', 'step': 6244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:25.093008', 'step': 6244, 'epoch': 1} {'type': 'loss', 'content': 0.05696460232138634, 'timestamp': '2025-10-01 04:20:25.094954', 'step': 6245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:25.125524', 'step': 6245, 'epoch': 1} {'type': 'loss', 'content': 0.10923008620738983, 'timestamp': '2025-10-01 04:20:25.127900', 'step': 6246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:25.158732', 'step': 6246, 'epoch': 1} {'type': 'loss', 'content': 0.2237270325422287, 'timestamp': '2025-10-01 04:20:25.163627', 'step': 6247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:25.203607', 'step': 6247, 'epoch': 1} {'type': 'loss', 'content': 0.12364114075899124, 'timestamp': '2025-10-01 04:20:25.227266', 'step': 6248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:25.260022', 'step': 6248, 'epoch': 1} {'type': 'loss', 'content': 0.14637495577335358, 'timestamp': '2025-10-01 04:20:25.262368', 'step': 6249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:25.295343', 'step': 6249, 'epoch': 1} {'type': 'loss', 'content': 0.18087045848369598, 'timestamp': '2025-10-01 04:20:25.297467', 'step': 6250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:25.332570', 'step': 6250, 'epoch': 1} {'type': 'loss', 'content': 0.1300496608018875, 'timestamp': '2025-10-01 04:20:25.334656', 'step': 6251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:25.366648', 'step': 6251, 'epoch': 1} {'type': 'loss', 'content': 0.11542269587516785, 'timestamp': '2025-10-01 04:20:25.390701', 'step': 6252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:25.423014', 'step': 6252, 'epoch': 1} {'type': 'loss', 'content': 0.0901566818356514, 'timestamp': '2025-10-01 04:20:25.425168', 'step': 6253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:25.456287', 'step': 6253, 'epoch': 1} {'type': 'loss', 'content': 0.21485204994678497, 'timestamp': '2025-10-01 04:20:25.458308', 'step': 6254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:25.495640', 'step': 6254, 'epoch': 1} {'type': 'loss', 'content': 0.30872732400894165, 'timestamp': '2025-10-01 04:20:25.497859', 'step': 6255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:20:25.535235', 'step': 6255, 'epoch': 1} {'type': 'loss', 'content': 0.1221752017736435, 'timestamp': '2025-10-01 04:20:25.560448', 'step': 6256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:25.597960', 'step': 6256, 'epoch': 1} {'type': 'loss', 'content': 0.12105420231819153, 'timestamp': '2025-10-01 04:20:25.600379', 'step': 6257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:25.633215', 'step': 6257, 'epoch': 1} {'type': 'loss', 'content': 0.13478165864944458, 'timestamp': '2025-10-01 04:20:25.635341', 'step': 6258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:20:25.667874', 'step': 6258, 'epoch': 1} {'type': 'loss', 'content': 0.19905392825603485, 'timestamp': '2025-10-01 04:20:25.672071', 'step': 6259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:25.706139', 'step': 6259, 'epoch': 1} {'type': 'loss', 'content': 0.16956844925880432, 'timestamp': '2025-10-01 04:20:25.729603', 'step': 6260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:25.763762', 'step': 6260, 'epoch': 1} {'type': 'loss', 'content': 0.07686630636453629, 'timestamp': '2025-10-01 04:20:25.765924', 'step': 6261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:25.799847', 'step': 6261, 'epoch': 1} {'type': 'loss', 'content': 0.10227138549089432, 'timestamp': '2025-10-01 04:20:25.815198', 'step': 6262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:25.845872', 'step': 6262, 'epoch': 1} {'type': 'loss', 'content': 0.16124297678470612, 'timestamp': '2025-10-01 04:20:25.847890', 'step': 6263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:25.880285', 'step': 6263, 'epoch': 1} {'type': 'loss', 'content': 0.1305532604455948, 'timestamp': '2025-10-01 04:20:25.904318', 'step': 6264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:25.936876', 'step': 6264, 'epoch': 1} {'type': 'loss', 'content': 0.12863853573799133, 'timestamp': '2025-10-01 04:20:25.938987', 'step': 6265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:25.969098', 'step': 6265, 'epoch': 1} {'type': 'loss', 'content': 0.20505723357200623, 'timestamp': '2025-10-01 04:20:25.972216', 'step': 6266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:26.010200', 'step': 6266, 'epoch': 1} {'type': 'loss', 'content': 0.14319905638694763, 'timestamp': '2025-10-01 04:20:26.012097', 'step': 6267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:26.048156', 'step': 6267, 'epoch': 1} {'type': 'loss', 'content': 0.1505107879638672, 'timestamp': '2025-10-01 04:20:26.071743', 'step': 6268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:26.114008', 'step': 6268, 'epoch': 1} {'type': 'loss', 'content': 0.17583434283733368, 'timestamp': '2025-10-01 04:20:26.117164', 'step': 6269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:26.150683', 'step': 6269, 'epoch': 1} {'type': 'loss', 'content': 0.1602231115102768, 'timestamp': '2025-10-01 04:20:26.153875', 'step': 6270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:26.186363', 'step': 6270, 'epoch': 1} {'type': 'loss', 'content': 0.19805802404880524, 'timestamp': '2025-10-01 04:20:26.188484', 'step': 6271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:26.219160', 'step': 6271, 'epoch': 1} {'type': 'loss', 'content': 0.14680053293704987, 'timestamp': '2025-10-01 04:20:26.244769', 'step': 6272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:26.279522', 'step': 6272, 'epoch': 1} {'type': 'loss', 'content': 0.11170484125614166, 'timestamp': '2025-10-01 04:20:26.281364', 'step': 6273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:26.313148', 'step': 6273, 'epoch': 1} {'type': 'loss', 'content': 0.16201767325401306, 'timestamp': '2025-10-01 04:20:26.316054', 'step': 6274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:26.347371', 'step': 6274, 'epoch': 1} {'type': 'loss', 'content': 0.12353990972042084, 'timestamp': '2025-10-01 04:20:26.349753', 'step': 6275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:26.382026', 'step': 6275, 'epoch': 1} {'type': 'loss', 'content': 0.22392147779464722, 'timestamp': '2025-10-01 04:20:26.405915', 'step': 6276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:26.437855', 'step': 6276, 'epoch': 1} {'type': 'loss', 'content': 0.14543530344963074, 'timestamp': '2025-10-01 04:20:26.442745', 'step': 6277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:26.480560', 'step': 6277, 'epoch': 1} {'type': 'loss', 'content': 0.20736278593540192, 'timestamp': '2025-10-01 04:20:26.482634', 'step': 6278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:26.514067', 'step': 6278, 'epoch': 1} {'type': 'loss', 'content': 0.20053929090499878, 'timestamp': '2025-10-01 04:20:26.516166', 'step': 6279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:26.553892', 'step': 6279, 'epoch': 1} {'type': 'loss', 'content': 0.12926828861236572, 'timestamp': '2025-10-01 04:20:26.577980', 'step': 6280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:26.614625', 'step': 6280, 'epoch': 1} {'type': 'loss', 'content': 0.15900598466396332, 'timestamp': '2025-10-01 04:20:26.616515', 'step': 6281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:26.649858', 'step': 6281, 'epoch': 1} {'type': 'loss', 'content': 0.18168705701828003, 'timestamp': '2025-10-01 04:20:26.652409', 'step': 6282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:26.686176', 'step': 6282, 'epoch': 1} {'type': 'loss', 'content': 0.32720887660980225, 'timestamp': '2025-10-01 04:20:26.688234', 'step': 6283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:26.719601', 'step': 6283, 'epoch': 1} {'type': 'loss', 'content': 0.14256437122821808, 'timestamp': '2025-10-01 04:20:26.743572', 'step': 6284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:26.783346', 'step': 6284, 'epoch': 1} {'type': 'loss', 'content': 0.1598295122385025, 'timestamp': '2025-10-01 04:20:26.785440', 'step': 6285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:26.817698', 'step': 6285, 'epoch': 1} {'type': 'loss', 'content': 0.1721116006374359, 'timestamp': '2025-10-01 04:20:26.821454', 'step': 6286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:26.855695', 'step': 6286, 'epoch': 1} {'type': 'loss', 'content': 0.1704539805650711, 'timestamp': '2025-10-01 04:20:26.860257', 'step': 6287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:26.893998', 'step': 6287, 'epoch': 1} {'type': 'loss', 'content': 0.22961409389972687, 'timestamp': '2025-10-01 04:20:26.917589', 'step': 6288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:26.949689', 'step': 6288, 'epoch': 1} {'type': 'loss', 'content': 0.18258777260780334, 'timestamp': '2025-10-01 04:20:26.953571', 'step': 6289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:26.986254', 'step': 6289, 'epoch': 1} {'type': 'loss', 'content': 0.1119731068611145, 'timestamp': '2025-10-01 04:20:26.988196', 'step': 6290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:27.029894', 'step': 6290, 'epoch': 1} {'type': 'loss', 'content': 0.2471492886543274, 'timestamp': '2025-10-01 04:20:27.032356', 'step': 6291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:27.065698', 'step': 6291, 'epoch': 1} {'type': 'loss', 'content': 0.24711406230926514, 'timestamp': '2025-10-01 04:20:27.089167', 'step': 6292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:27.134290', 'step': 6292, 'epoch': 1} {'type': 'loss', 'content': 0.09499114006757736, 'timestamp': '2025-10-01 04:20:27.139121', 'step': 6293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:27.177807', 'step': 6293, 'epoch': 1} {'type': 'loss', 'content': 0.31984058022499084, 'timestamp': '2025-10-01 04:20:27.179755', 'step': 6294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:27.210768', 'step': 6294, 'epoch': 1} {'type': 'loss', 'content': 0.2011307179927826, 'timestamp': '2025-10-01 04:20:27.212978', 'step': 6295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:27.243668', 'step': 6295, 'epoch': 1} {'type': 'loss', 'content': 0.1003921627998352, 'timestamp': '2025-10-01 04:20:27.267056', 'step': 6296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:27.299325', 'step': 6296, 'epoch': 1} {'type': 'loss', 'content': 0.127422034740448, 'timestamp': '2025-10-01 04:20:27.301671', 'step': 6297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:27.333093', 'step': 6297, 'epoch': 1} {'type': 'loss', 'content': 0.18052923679351807, 'timestamp': '2025-10-01 04:20:27.337759', 'step': 6298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:27.370108', 'step': 6298, 'epoch': 1} {'type': 'loss', 'content': 0.14184124767780304, 'timestamp': '2025-10-01 04:20:27.372456', 'step': 6299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:27.405660', 'step': 6299, 'epoch': 1} {'type': 'loss', 'content': 0.19568122923374176, 'timestamp': '2025-10-01 04:20:27.429173', 'step': 6300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:27.461001', 'step': 6300, 'epoch': 1} {'type': 'loss', 'content': 0.20149502158164978, 'timestamp': '2025-10-01 04:20:27.463780', 'step': 6301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:27.494248', 'step': 6301, 'epoch': 1} {'type': 'loss', 'content': 0.11938557773828506, 'timestamp': '2025-10-01 04:20:27.496578', 'step': 6302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:27.528080', 'step': 6302, 'epoch': 1} {'type': 'loss', 'content': 0.11110545694828033, 'timestamp': '2025-10-01 04:20:27.530306', 'step': 6303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:27.559983', 'step': 6303, 'epoch': 1} {'type': 'loss', 'content': 0.14638295769691467, 'timestamp': '2025-10-01 04:20:27.583658', 'step': 6304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:27.614362', 'step': 6304, 'epoch': 1} {'type': 'loss', 'content': 0.11844345927238464, 'timestamp': '2025-10-01 04:20:27.618041', 'step': 6305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:27.650392', 'step': 6305, 'epoch': 1} {'type': 'loss', 'content': 0.15608246624469757, 'timestamp': '2025-10-01 04:20:27.652756', 'step': 6306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:27.686387', 'step': 6306, 'epoch': 1} {'type': 'loss', 'content': 0.1859234720468521, 'timestamp': '2025-10-01 04:20:27.688881', 'step': 6307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:20:27.719361', 'step': 6307, 'epoch': 1} {'type': 'loss', 'content': 0.07707175612449646, 'timestamp': '2025-10-01 04:20:27.744942', 'step': 6308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:27.775327', 'step': 6308, 'epoch': 1} {'type': 'loss', 'content': 0.12302794307470322, 'timestamp': '2025-10-01 04:20:27.777754', 'step': 6309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:27.809627', 'step': 6309, 'epoch': 1} {'type': 'loss', 'content': 0.13449327647686005, 'timestamp': '2025-10-01 04:20:27.813247', 'step': 6310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:27.845567', 'step': 6310, 'epoch': 1} {'type': 'loss', 'content': 0.13135923445224762, 'timestamp': '2025-10-01 04:20:27.847950', 'step': 6311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:27.879880', 'step': 6311, 'epoch': 1} {'type': 'loss', 'content': 0.0765550509095192, 'timestamp': '2025-10-01 04:20:27.903805', 'step': 6312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:27.935392', 'step': 6312, 'epoch': 1} {'type': 'loss', 'content': 0.14603063464164734, 'timestamp': '2025-10-01 04:20:27.937989', 'step': 6313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:27.972632', 'step': 6313, 'epoch': 1} {'type': 'loss', 'content': 0.12479306757450104, 'timestamp': '2025-10-01 04:20:27.974995', 'step': 6314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.012511', 'step': 6314, 'epoch': 1} {'type': 'loss', 'content': 0.21319781243801117, 'timestamp': '2025-10-01 04:20:28.015031', 'step': 6315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:28.050556', 'step': 6315, 'epoch': 1} {'type': 'loss', 'content': 0.19358617067337036, 'timestamp': '2025-10-01 04:20:28.074481', 'step': 6316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.112872', 'step': 6316, 'epoch': 1} {'type': 'loss', 'content': 0.21149946749210358, 'timestamp': '2025-10-01 04:20:28.115165', 'step': 6317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:28.147852', 'step': 6317, 'epoch': 1} {'type': 'loss', 'content': 0.2801354229450226, 'timestamp': '2025-10-01 04:20:28.149910', 'step': 6318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.184319', 'step': 6318, 'epoch': 1} {'type': 'loss', 'content': 0.26201295852661133, 'timestamp': '2025-10-01 04:20:28.186908', 'step': 6319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:28.218142', 'step': 6319, 'epoch': 1} {'type': 'loss', 'content': 0.12889635562896729, 'timestamp': '2025-10-01 04:20:28.242612', 'step': 6320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.273643', 'step': 6320, 'epoch': 1} {'type': 'loss', 'content': 0.1569720059633255, 'timestamp': '2025-10-01 04:20:28.275807', 'step': 6321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:28.307624', 'step': 6321, 'epoch': 1} {'type': 'loss', 'content': 0.23553884029388428, 'timestamp': '2025-10-01 04:20:28.309803', 'step': 6322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:28.343626', 'step': 6322, 'epoch': 1} {'type': 'loss', 'content': 0.1243080273270607, 'timestamp': '2025-10-01 04:20:28.345906', 'step': 6323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.378939', 'step': 6323, 'epoch': 1} {'type': 'loss', 'content': 0.10613115131855011, 'timestamp': '2025-10-01 04:20:28.403064', 'step': 6324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:28.435346', 'step': 6324, 'epoch': 1} {'type': 'loss', 'content': 0.16904956102371216, 'timestamp': '2025-10-01 04:20:28.437767', 'step': 6325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:28.470529', 'step': 6325, 'epoch': 1} {'type': 'loss', 'content': 0.1328411102294922, 'timestamp': '2025-10-01 04:20:28.473109', 'step': 6326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:28.504360', 'step': 6326, 'epoch': 1} {'type': 'loss', 'content': 0.1102084219455719, 'timestamp': '2025-10-01 04:20:28.507114', 'step': 6327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.541107', 'step': 6327, 'epoch': 1} {'type': 'loss', 'content': 0.13233277201652527, 'timestamp': '2025-10-01 04:20:28.565260', 'step': 6328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:28.608277', 'step': 6328, 'epoch': 1} {'type': 'loss', 'content': 0.13057444989681244, 'timestamp': '2025-10-01 04:20:28.610499', 'step': 6329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.648003', 'step': 6329, 'epoch': 1} {'type': 'loss', 'content': 0.13341282308101654, 'timestamp': '2025-10-01 04:20:28.650646', 'step': 6330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.685404', 'step': 6330, 'epoch': 1} {'type': 'loss', 'content': 0.1595824509859085, 'timestamp': '2025-10-01 04:20:28.687651', 'step': 6331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.721854', 'step': 6331, 'epoch': 1} {'type': 'loss', 'content': 0.13513267040252686, 'timestamp': '2025-10-01 04:20:28.745327', 'step': 6332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:28.778007', 'step': 6332, 'epoch': 1} {'type': 'loss', 'content': 0.10151084512472153, 'timestamp': '2025-10-01 04:20:28.780638', 'step': 6333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:28.826677', 'step': 6333, 'epoch': 1} {'type': 'loss', 'content': 0.1425285041332245, 'timestamp': '2025-10-01 04:20:28.828817', 'step': 6334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.859910', 'step': 6334, 'epoch': 1} {'type': 'loss', 'content': 0.12395431101322174, 'timestamp': '2025-10-01 04:20:28.864244', 'step': 6335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:28.895691', 'step': 6335, 'epoch': 1} {'type': 'loss', 'content': 0.19165349006652832, 'timestamp': '2025-10-01 04:20:28.919236', 'step': 6336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:28.957464', 'step': 6336, 'epoch': 1} {'type': 'loss', 'content': 0.1165761724114418, 'timestamp': '2025-10-01 04:20:28.959900', 'step': 6337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:28.991366', 'step': 6337, 'epoch': 1} {'type': 'loss', 'content': 0.1661965399980545, 'timestamp': '2025-10-01 04:20:28.993718', 'step': 6338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:29.024937', 'step': 6338, 'epoch': 1} {'type': 'loss', 'content': 0.1795700490474701, 'timestamp': '2025-10-01 04:20:29.027379', 'step': 6339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:29.063568', 'step': 6339, 'epoch': 1} {'type': 'loss', 'content': 0.12532630562782288, 'timestamp': '2025-10-01 04:20:29.087046', 'step': 6340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:29.121846', 'step': 6340, 'epoch': 1} {'type': 'loss', 'content': 0.16097073256969452, 'timestamp': '2025-10-01 04:20:29.123580', 'step': 6341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:29.162907', 'step': 6341, 'epoch': 1} {'type': 'loss', 'content': 0.2258162647485733, 'timestamp': '2025-10-01 04:20:29.165018', 'step': 6342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:29.208465', 'step': 6342, 'epoch': 1} {'type': 'loss', 'content': 0.21940568089485168, 'timestamp': '2025-10-01 04:20:29.210499', 'step': 6343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:29.250883', 'step': 6343, 'epoch': 1} {'type': 'loss', 'content': 0.1340627819299698, 'timestamp': '2025-10-01 04:20:29.274229', 'step': 6344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:29.315781', 'step': 6344, 'epoch': 1} {'type': 'loss', 'content': 0.20272158086299896, 'timestamp': '2025-10-01 04:20:29.317750', 'step': 6345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:29.348025', 'step': 6345, 'epoch': 1} {'type': 'loss', 'content': 0.12028607726097107, 'timestamp': '2025-10-01 04:20:29.349943', 'step': 6346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:29.383025', 'step': 6346, 'epoch': 1} {'type': 'loss', 'content': 0.11153077334165573, 'timestamp': '2025-10-01 04:20:29.387862', 'step': 6347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:29.424910', 'step': 6347, 'epoch': 1} {'type': 'loss', 'content': 0.12679728865623474, 'timestamp': '2025-10-01 04:20:29.448394', 'step': 6348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:29.480681', 'step': 6348, 'epoch': 1} {'type': 'loss', 'content': 0.13099165260791779, 'timestamp': '2025-10-01 04:20:29.483051', 'step': 6349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:29.518296', 'step': 6349, 'epoch': 1} {'type': 'loss', 'content': 0.17342060804367065, 'timestamp': '2025-10-01 04:20:29.520221', 'step': 6350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:29.554623', 'step': 6350, 'epoch': 1} {'type': 'loss', 'content': 0.13350138068199158, 'timestamp': '2025-10-01 04:20:29.556270', 'step': 6351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:29.587862', 'step': 6351, 'epoch': 1} {'type': 'loss', 'content': 0.13647955656051636, 'timestamp': '2025-10-01 04:20:29.611310', 'step': 6352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:29.648602', 'step': 6352, 'epoch': 1} {'type': 'loss', 'content': 0.14737050235271454, 'timestamp': '2025-10-01 04:20:29.650466', 'step': 6353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:29.681057', 'step': 6353, 'epoch': 1} {'type': 'loss', 'content': 0.15122130513191223, 'timestamp': '2025-10-01 04:20:29.687205', 'step': 6354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:29.726516', 'step': 6354, 'epoch': 1} {'type': 'loss', 'content': 0.16999807953834534, 'timestamp': '2025-10-01 04:20:29.728343', 'step': 6355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:29.766653', 'step': 6355, 'epoch': 1} {'type': 'loss', 'content': 0.14835821092128754, 'timestamp': '2025-10-01 04:20:29.790067', 'step': 6356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:29.820351', 'step': 6356, 'epoch': 1} {'type': 'loss', 'content': 0.14285911619663239, 'timestamp': '2025-10-01 04:20:29.822639', 'step': 6357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:29.852763', 'step': 6357, 'epoch': 1} {'type': 'loss', 'content': 0.1541883647441864, 'timestamp': '2025-10-01 04:20:29.854768', 'step': 6358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:29.889007', 'step': 6358, 'epoch': 1} {'type': 'loss', 'content': 0.13071170449256897, 'timestamp': '2025-10-01 04:20:29.891296', 'step': 6359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:29.921659', 'step': 6359, 'epoch': 1} {'type': 'loss', 'content': 0.25442686676979065, 'timestamp': '2025-10-01 04:20:29.945243', 'step': 6360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:29.978147', 'step': 6360, 'epoch': 1} {'type': 'loss', 'content': 0.0981040671467781, 'timestamp': '2025-10-01 04:20:29.980252', 'step': 6361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:30.021954', 'step': 6361, 'epoch': 1} {'type': 'loss', 'content': 0.14409615099430084, 'timestamp': '2025-10-01 04:20:30.023967', 'step': 6362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:30.055401', 'step': 6362, 'epoch': 1} {'type': 'loss', 'content': 0.12824168801307678, 'timestamp': '2025-10-01 04:20:30.057396', 'step': 6363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:30.088021', 'step': 6363, 'epoch': 1} {'type': 'loss', 'content': 0.20779645442962646, 'timestamp': '2025-10-01 04:20:30.111338', 'step': 6364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:30.142226', 'step': 6364, 'epoch': 1} {'type': 'loss', 'content': 0.17913584411144257, 'timestamp': '2025-10-01 04:20:30.143948', 'step': 6365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:30.177379', 'step': 6365, 'epoch': 1} {'type': 'loss', 'content': 0.12686383724212646, 'timestamp': '2025-10-01 04:20:30.179061', 'step': 6366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:30.215493', 'step': 6366, 'epoch': 1} {'type': 'loss', 'content': 0.11362182348966599, 'timestamp': '2025-10-01 04:20:30.217949', 'step': 6367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:30.250865', 'step': 6367, 'epoch': 1} {'type': 'loss', 'content': 0.1279607117176056, 'timestamp': '2025-10-01 04:20:30.274271', 'step': 6368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:30.310509', 'step': 6368, 'epoch': 1} {'type': 'loss', 'content': 0.1628928929567337, 'timestamp': '2025-10-01 04:20:30.312589', 'step': 6369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:30.345934', 'step': 6369, 'epoch': 1} {'type': 'loss', 'content': 0.16705738008022308, 'timestamp': '2025-10-01 04:20:30.347812', 'step': 6370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:30.389313', 'step': 6370, 'epoch': 1} {'type': 'loss', 'content': 0.1500518023967743, 'timestamp': '2025-10-01 04:20:30.391335', 'step': 6371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:30.424299', 'step': 6371, 'epoch': 1} {'type': 'loss', 'content': 0.07643629610538483, 'timestamp': '2025-10-01 04:20:30.448022', 'step': 6372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:30.481931', 'step': 6372, 'epoch': 1} {'type': 'loss', 'content': 0.18125057220458984, 'timestamp': '2025-10-01 04:20:30.485263', 'step': 6373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:30.516932', 'step': 6373, 'epoch': 1} {'type': 'loss', 'content': 0.10265680402517319, 'timestamp': '2025-10-01 04:20:30.518759', 'step': 6374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:30.549096', 'step': 6374, 'epoch': 1} {'type': 'loss', 'content': 0.15401825308799744, 'timestamp': '2025-10-01 04:20:30.551088', 'step': 6375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:30.582256', 'step': 6375, 'epoch': 1} {'type': 'loss', 'content': 0.20588049292564392, 'timestamp': '2025-10-01 04:20:30.606295', 'step': 6376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:30.640777', 'step': 6376, 'epoch': 1} {'type': 'loss', 'content': 0.17129501700401306, 'timestamp': '2025-10-01 04:20:30.642759', 'step': 6377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:30.673424', 'step': 6377, 'epoch': 1} {'type': 'loss', 'content': 0.11308995634317398, 'timestamp': '2025-10-01 04:20:30.675706', 'step': 6378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:30.712062', 'step': 6378, 'epoch': 1} {'type': 'loss', 'content': 0.16722668707370758, 'timestamp': '2025-10-01 04:20:30.714807', 'step': 6379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:30.746168', 'step': 6379, 'epoch': 1} {'type': 'loss', 'content': 0.16979894042015076, 'timestamp': '2025-10-01 04:20:30.769609', 'step': 6380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:30.800984', 'step': 6380, 'epoch': 1} {'type': 'loss', 'content': 0.11209047585725784, 'timestamp': '2025-10-01 04:20:30.803579', 'step': 6381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:30.843215', 'step': 6381, 'epoch': 1} {'type': 'loss', 'content': 0.14982792735099792, 'timestamp': '2025-10-01 04:20:30.845807', 'step': 6382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:30.879141', 'step': 6382, 'epoch': 1} {'type': 'loss', 'content': 0.12047485262155533, 'timestamp': '2025-10-01 04:20:30.881105', 'step': 6383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:30.913169', 'step': 6383, 'epoch': 1} {'type': 'loss', 'content': 0.10375130921602249, 'timestamp': '2025-10-01 04:20:30.936642', 'step': 6384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:30.966779', 'step': 6384, 'epoch': 1} {'type': 'loss', 'content': 0.2011345773935318, 'timestamp': '2025-10-01 04:20:30.968890', 'step': 6385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.001239', 'step': 6385, 'epoch': 1} {'type': 'loss', 'content': 0.21334277093410492, 'timestamp': '2025-10-01 04:20:31.003339', 'step': 6386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:31.035659', 'step': 6386, 'epoch': 1} {'type': 'loss', 'content': 0.20894554257392883, 'timestamp': '2025-10-01 04:20:31.038011', 'step': 6387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.069143', 'step': 6387, 'epoch': 1} {'type': 'loss', 'content': 0.2128184586763382, 'timestamp': '2025-10-01 04:20:31.092591', 'step': 6388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:31.122058', 'step': 6388, 'epoch': 1} {'type': 'loss', 'content': 0.1674651950597763, 'timestamp': '2025-10-01 04:20:31.123841', 'step': 6389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:31.155227', 'step': 6389, 'epoch': 1} {'type': 'loss', 'content': 0.13058720529079437, 'timestamp': '2025-10-01 04:20:31.157470', 'step': 6390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:31.188793', 'step': 6390, 'epoch': 1} {'type': 'loss', 'content': 0.11719245463609695, 'timestamp': '2025-10-01 04:20:31.190791', 'step': 6391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:31.223584', 'step': 6391, 'epoch': 1} {'type': 'loss', 'content': 0.23124000430107117, 'timestamp': '2025-10-01 04:20:31.246954', 'step': 6392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.278597', 'step': 6392, 'epoch': 1} {'type': 'loss', 'content': 0.13611970841884613, 'timestamp': '2025-10-01 04:20:31.280635', 'step': 6393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:31.318467', 'step': 6393, 'epoch': 1} {'type': 'loss', 'content': 0.1671741008758545, 'timestamp': '2025-10-01 04:20:31.320529', 'step': 6394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:31.352446', 'step': 6394, 'epoch': 1} {'type': 'loss', 'content': 0.14331118762493134, 'timestamp': '2025-10-01 04:20:31.354525', 'step': 6395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:31.385529', 'step': 6395, 'epoch': 1} {'type': 'loss', 'content': 0.20829448103904724, 'timestamp': '2025-10-01 04:20:31.411146', 'step': 6396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:31.443308', 'step': 6396, 'epoch': 1} {'type': 'loss', 'content': 0.2241867333650589, 'timestamp': '2025-10-01 04:20:31.445307', 'step': 6397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.477547', 'step': 6397, 'epoch': 1} {'type': 'loss', 'content': 0.18036039173603058, 'timestamp': '2025-10-01 04:20:31.479483', 'step': 6398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:31.513165', 'step': 6398, 'epoch': 1} {'type': 'loss', 'content': 0.2112714946269989, 'timestamp': '2025-10-01 04:20:31.515082', 'step': 6399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.546055', 'step': 6399, 'epoch': 1} {'type': 'loss', 'content': 0.11007707566022873, 'timestamp': '2025-10-01 04:20:31.569237', 'step': 6400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.598447', 'step': 6400, 'epoch': 1} {'type': 'loss', 'content': 0.16821175813674927, 'timestamp': '2025-10-01 04:20:31.600266', 'step': 6401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.631430', 'step': 6401, 'epoch': 1} {'type': 'loss', 'content': 0.17998117208480835, 'timestamp': '2025-10-01 04:20:31.633620', 'step': 6402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.667055', 'step': 6402, 'epoch': 1} {'type': 'loss', 'content': 0.15947312116622925, 'timestamp': '2025-10-01 04:20:31.669080', 'step': 6403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:31.701062', 'step': 6403, 'epoch': 1} {'type': 'loss', 'content': 0.13448186218738556, 'timestamp': '2025-10-01 04:20:31.724434', 'step': 6404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:31.755247', 'step': 6404, 'epoch': 1} {'type': 'loss', 'content': 0.21385034918785095, 'timestamp': '2025-10-01 04:20:31.757293', 'step': 6405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.789338', 'step': 6405, 'epoch': 1} {'type': 'loss', 'content': 0.07356738299131393, 'timestamp': '2025-10-01 04:20:31.791539', 'step': 6406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:31.822307', 'step': 6406, 'epoch': 1} {'type': 'loss', 'content': 0.17945805191993713, 'timestamp': '2025-10-01 04:20:31.824294', 'step': 6407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:31.855280', 'step': 6407, 'epoch': 1} {'type': 'loss', 'content': 0.11188911646604538, 'timestamp': '2025-10-01 04:20:31.879129', 'step': 6408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.910405', 'step': 6408, 'epoch': 1} {'type': 'loss', 'content': 0.12211159616708755, 'timestamp': '2025-10-01 04:20:31.912496', 'step': 6409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.943293', 'step': 6409, 'epoch': 1} {'type': 'loss', 'content': 0.14100338518619537, 'timestamp': '2025-10-01 04:20:31.945277', 'step': 6410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:31.978297', 'step': 6410, 'epoch': 1} {'type': 'loss', 'content': 0.08542127162218094, 'timestamp': '2025-10-01 04:20:31.980333', 'step': 6411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:32.010878', 'step': 6411, 'epoch': 1} {'type': 'loss', 'content': 0.12689228355884552, 'timestamp': '2025-10-01 04:20:32.034302', 'step': 6412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:20:32.065902', 'step': 6412, 'epoch': 1} {'type': 'loss', 'content': 0.1191663071513176, 'timestamp': '2025-10-01 04:20:32.068674', 'step': 6413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:32.100574', 'step': 6413, 'epoch': 1} {'type': 'loss', 'content': 0.1846155971288681, 'timestamp': '2025-10-01 04:20:32.102744', 'step': 6414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:32.134593', 'step': 6414, 'epoch': 1} {'type': 'loss', 'content': 0.12828323245048523, 'timestamp': '2025-10-01 04:20:32.148322', 'step': 6415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:32.180691', 'step': 6415, 'epoch': 1} {'type': 'loss', 'content': 0.12312255054712296, 'timestamp': '2025-10-01 04:20:32.204061', 'step': 6416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:32.235193', 'step': 6416, 'epoch': 1} {'type': 'loss', 'content': 0.11371240764856339, 'timestamp': '2025-10-01 04:20:32.237508', 'step': 6417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:32.270271', 'step': 6417, 'epoch': 1} {'type': 'loss', 'content': 0.08004322648048401, 'timestamp': '2025-10-01 04:20:32.272193', 'step': 6418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:32.305155', 'step': 6418, 'epoch': 1} {'type': 'loss', 'content': 0.18913204967975616, 'timestamp': '2025-10-01 04:20:32.307827', 'step': 6419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:32.338373', 'step': 6419, 'epoch': 1} {'type': 'loss', 'content': 0.17655667662620544, 'timestamp': '2025-10-01 04:20:32.362010', 'step': 6420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:32.402859', 'step': 6420, 'epoch': 1} {'type': 'loss', 'content': 0.17250749468803406, 'timestamp': '2025-10-01 04:20:32.405135', 'step': 6421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:32.436698', 'step': 6421, 'epoch': 1} {'type': 'loss', 'content': 0.23175419867038727, 'timestamp': '2025-10-01 04:20:32.438614', 'step': 6422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:32.472512', 'step': 6422, 'epoch': 1} {'type': 'loss', 'content': 0.13091793656349182, 'timestamp': '2025-10-01 04:20:32.474862', 'step': 6423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:32.507645', 'step': 6423, 'epoch': 1} {'type': 'loss', 'content': 0.11328350007534027, 'timestamp': '2025-10-01 04:20:32.531012', 'step': 6424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:32.561481', 'step': 6424, 'epoch': 1} {'type': 'loss', 'content': 0.04835282266139984, 'timestamp': '2025-10-01 04:20:32.563673', 'step': 6425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:32.594802', 'step': 6425, 'epoch': 1} {'type': 'loss', 'content': 0.12899406254291534, 'timestamp': '2025-10-01 04:20:32.596915', 'step': 6426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:32.630340', 'step': 6426, 'epoch': 1} {'type': 'loss', 'content': 0.1487763375043869, 'timestamp': '2025-10-01 04:20:32.632408', 'step': 6427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:32.664689', 'step': 6427, 'epoch': 1} {'type': 'loss', 'content': 0.19766701757907867, 'timestamp': '2025-10-01 04:20:32.688402', 'step': 6428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:32.719538', 'step': 6428, 'epoch': 1} {'type': 'loss', 'content': 0.12384486198425293, 'timestamp': '2025-10-01 04:20:32.721428', 'step': 6429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:32.759688', 'step': 6429, 'epoch': 1} {'type': 'loss', 'content': 0.17305220663547516, 'timestamp': '2025-10-01 04:20:32.762463', 'step': 6430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:32.796007', 'step': 6430, 'epoch': 1} {'type': 'loss', 'content': 0.1570892482995987, 'timestamp': '2025-10-01 04:20:32.798494', 'step': 6431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:32.831228', 'step': 6431, 'epoch': 1} {'type': 'loss', 'content': 0.18856753408908844, 'timestamp': '2025-10-01 04:20:32.854892', 'step': 6432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:32.887479', 'step': 6432, 'epoch': 1} {'type': 'loss', 'content': 0.14430168271064758, 'timestamp': '2025-10-01 04:20:32.889972', 'step': 6433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:32.922629', 'step': 6433, 'epoch': 1} {'type': 'loss', 'content': 0.07993289083242416, 'timestamp': '2025-10-01 04:20:32.924545', 'step': 6434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:32.957949', 'step': 6434, 'epoch': 1} {'type': 'loss', 'content': 0.178328275680542, 'timestamp': '2025-10-01 04:20:32.960116', 'step': 6435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:32.992289', 'step': 6435, 'epoch': 1} {'type': 'loss', 'content': 0.15630337595939636, 'timestamp': '2025-10-01 04:20:33.016153', 'step': 6436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:33.047185', 'step': 6436, 'epoch': 1} {'type': 'loss', 'content': 0.17286193370819092, 'timestamp': '2025-10-01 04:20:33.049370', 'step': 6437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:33.083080', 'step': 6437, 'epoch': 1} {'type': 'loss', 'content': 0.1374119073152542, 'timestamp': '2025-10-01 04:20:33.085315', 'step': 6438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:33.120125', 'step': 6438, 'epoch': 1} {'type': 'loss', 'content': 0.13394342362880707, 'timestamp': '2025-10-01 04:20:33.122082', 'step': 6439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:33.156280', 'step': 6439, 'epoch': 1} {'type': 'loss', 'content': 0.15160682797431946, 'timestamp': '2025-10-01 04:20:33.179841', 'step': 6440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:33.209829', 'step': 6440, 'epoch': 1} {'type': 'loss', 'content': 0.1632155179977417, 'timestamp': '2025-10-01 04:20:33.212544', 'step': 6441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:33.245074', 'step': 6441, 'epoch': 1} {'type': 'loss', 'content': 0.14890950918197632, 'timestamp': '2025-10-01 04:20:33.247189', 'step': 6442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:33.281361', 'step': 6442, 'epoch': 1} {'type': 'loss', 'content': 0.07547170668840408, 'timestamp': '2025-10-01 04:20:33.283499', 'step': 6443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:33.316186', 'step': 6443, 'epoch': 1} {'type': 'loss', 'content': 0.15949052572250366, 'timestamp': '2025-10-01 04:20:33.340014', 'step': 6444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:33.372910', 'step': 6444, 'epoch': 1} {'type': 'loss', 'content': 0.1696563959121704, 'timestamp': '2025-10-01 04:20:33.375077', 'step': 6445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:33.405121', 'step': 6445, 'epoch': 1} {'type': 'loss', 'content': 0.2248436063528061, 'timestamp': '2025-10-01 04:20:33.407091', 'step': 6446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:33.438329', 'step': 6446, 'epoch': 1} {'type': 'loss', 'content': 0.22023910284042358, 'timestamp': '2025-10-01 04:20:33.440525', 'step': 6447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:33.472383', 'step': 6447, 'epoch': 1} {'type': 'loss', 'content': 0.2306620180606842, 'timestamp': '2025-10-01 04:20:33.495961', 'step': 6448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:33.528799', 'step': 6448, 'epoch': 1} {'type': 'loss', 'content': 0.1417921483516693, 'timestamp': '2025-10-01 04:20:33.530732', 'step': 6449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:33.562202', 'step': 6449, 'epoch': 1} {'type': 'loss', 'content': 0.11711990833282471, 'timestamp': '2025-10-01 04:20:33.565145', 'step': 6450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:33.595998', 'step': 6450, 'epoch': 1} {'type': 'loss', 'content': 0.1253172904253006, 'timestamp': '2025-10-01 04:20:33.598126', 'step': 6451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:33.629986', 'step': 6451, 'epoch': 1} {'type': 'loss', 'content': 0.13351289927959442, 'timestamp': '2025-10-01 04:20:33.653478', 'step': 6452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:33.685460', 'step': 6452, 'epoch': 1} {'type': 'loss', 'content': 0.1585976481437683, 'timestamp': '2025-10-01 04:20:33.687452', 'step': 6453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:33.719066', 'step': 6453, 'epoch': 1} {'type': 'loss', 'content': 0.13388827443122864, 'timestamp': '2025-10-01 04:20:33.721208', 'step': 6454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:33.753262', 'step': 6454, 'epoch': 1} {'type': 'loss', 'content': 0.13903802633285522, 'timestamp': '2025-10-01 04:20:33.755858', 'step': 6455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:33.786918', 'step': 6455, 'epoch': 1} {'type': 'loss', 'content': 0.1367018222808838, 'timestamp': '2025-10-01 04:20:33.810467', 'step': 6456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:33.840918', 'step': 6456, 'epoch': 1} {'type': 'loss', 'content': 0.13991032540798187, 'timestamp': '2025-10-01 04:20:33.842945', 'step': 6457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:33.887992', 'step': 6457, 'epoch': 1} {'type': 'loss', 'content': 0.09055622667074203, 'timestamp': '2025-10-01 04:20:33.889925', 'step': 6458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:33.934306', 'step': 6458, 'epoch': 1} {'type': 'loss', 'content': 0.17569604516029358, 'timestamp': '2025-10-01 04:20:33.936597', 'step': 6459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:33.970003', 'step': 6459, 'epoch': 1} {'type': 'loss', 'content': 0.13061226904392242, 'timestamp': '2025-10-01 04:20:33.993769', 'step': 6460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.031600', 'step': 6460, 'epoch': 1} {'type': 'loss', 'content': 0.15915848314762115, 'timestamp': '2025-10-01 04:20:34.033660', 'step': 6461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:34.068260', 'step': 6461, 'epoch': 1} {'type': 'loss', 'content': 0.0997341200709343, 'timestamp': '2025-10-01 04:20:34.070591', 'step': 6462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:34.102661', 'step': 6462, 'epoch': 1} {'type': 'loss', 'content': 0.21708235144615173, 'timestamp': '2025-10-01 04:20:34.105269', 'step': 6463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:34.137925', 'step': 6463, 'epoch': 1} {'type': 'loss', 'content': 0.17577049136161804, 'timestamp': '2025-10-01 04:20:34.163926', 'step': 6464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.195418', 'step': 6464, 'epoch': 1} {'type': 'loss', 'content': 0.17174093425273895, 'timestamp': '2025-10-01 04:20:34.197469', 'step': 6465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.230138', 'step': 6465, 'epoch': 1} {'type': 'loss', 'content': 0.0906803235411644, 'timestamp': '2025-10-01 04:20:34.231986', 'step': 6466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:34.267328', 'step': 6466, 'epoch': 1} {'type': 'loss', 'content': 0.09794306755065918, 'timestamp': '2025-10-01 04:20:34.269220', 'step': 6467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.300031', 'step': 6467, 'epoch': 1} {'type': 'loss', 'content': 0.25645390152931213, 'timestamp': '2025-10-01 04:20:34.323650', 'step': 6468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.355313', 'step': 6468, 'epoch': 1} {'type': 'loss', 'content': 0.14630194008350372, 'timestamp': '2025-10-01 04:20:34.357549', 'step': 6469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.388008', 'step': 6469, 'epoch': 1} {'type': 'loss', 'content': 0.088096022605896, 'timestamp': '2025-10-01 04:20:34.390547', 'step': 6470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:34.423494', 'step': 6470, 'epoch': 1} {'type': 'loss', 'content': 0.14361071586608887, 'timestamp': '2025-10-01 04:20:34.425644', 'step': 6471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.459505', 'step': 6471, 'epoch': 1} {'type': 'loss', 'content': 0.07576259970664978, 'timestamp': '2025-10-01 04:20:34.483002', 'step': 6472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.517879', 'step': 6472, 'epoch': 1} {'type': 'loss', 'content': 0.10388240218162537, 'timestamp': '2025-10-01 04:20:34.520882', 'step': 6473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:34.555566', 'step': 6473, 'epoch': 1} {'type': 'loss', 'content': 0.10270822048187256, 'timestamp': '2025-10-01 04:20:34.558022', 'step': 6474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:34.591588', 'step': 6474, 'epoch': 1} {'type': 'loss', 'content': 0.20545169711112976, 'timestamp': '2025-10-01 04:20:34.594014', 'step': 6475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:34.624736', 'step': 6475, 'epoch': 1} {'type': 'loss', 'content': 0.10560251772403717, 'timestamp': '2025-10-01 04:20:34.648707', 'step': 6476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.680850', 'step': 6476, 'epoch': 1} {'type': 'loss', 'content': 0.12747205793857574, 'timestamp': '2025-10-01 04:20:34.682754', 'step': 6477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.715303', 'step': 6477, 'epoch': 1} {'type': 'loss', 'content': 0.15840910375118256, 'timestamp': '2025-10-01 04:20:34.717298', 'step': 6478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.748156', 'step': 6478, 'epoch': 1} {'type': 'loss', 'content': 0.19032829999923706, 'timestamp': '2025-10-01 04:20:34.749986', 'step': 6479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.783218', 'step': 6479, 'epoch': 1} {'type': 'loss', 'content': 0.1360720843076706, 'timestamp': '2025-10-01 04:20:34.806779', 'step': 6480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.838229', 'step': 6480, 'epoch': 1} {'type': 'loss', 'content': 0.17916317284107208, 'timestamp': '2025-10-01 04:20:34.840317', 'step': 6481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:34.873177', 'step': 6481, 'epoch': 1} {'type': 'loss', 'content': 0.14701831340789795, 'timestamp': '2025-10-01 04:20:34.875198', 'step': 6482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:34.915504', 'step': 6482, 'epoch': 1} {'type': 'loss', 'content': 0.1525837928056717, 'timestamp': '2025-10-01 04:20:34.917646', 'step': 6483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:34.951606', 'step': 6483, 'epoch': 1} {'type': 'loss', 'content': 0.12282794713973999, 'timestamp': '2025-10-01 04:20:34.975127', 'step': 6484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:35.007566', 'step': 6484, 'epoch': 1} {'type': 'loss', 'content': 0.189985454082489, 'timestamp': '2025-10-01 04:20:35.009558', 'step': 6485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:35.048299', 'step': 6485, 'epoch': 1} {'type': 'loss', 'content': 0.12624551355838776, 'timestamp': '2025-10-01 04:20:35.051022', 'step': 6486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:35.085701', 'step': 6486, 'epoch': 1} {'type': 'loss', 'content': 0.1596687287092209, 'timestamp': '2025-10-01 04:20:35.087579', 'step': 6487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:35.125049', 'step': 6487, 'epoch': 1} {'type': 'loss', 'content': 0.12548650801181793, 'timestamp': '2025-10-01 04:20:35.148825', 'step': 6488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:35.181396', 'step': 6488, 'epoch': 1} {'type': 'loss', 'content': 0.15437540411949158, 'timestamp': '2025-10-01 04:20:35.183659', 'step': 6489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:35.215841', 'step': 6489, 'epoch': 1} {'type': 'loss', 'content': 0.13565842807292938, 'timestamp': '2025-10-01 04:20:35.218117', 'step': 6490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:35.250447', 'step': 6490, 'epoch': 1} {'type': 'loss', 'content': 0.13209785521030426, 'timestamp': '2025-10-01 04:20:35.252502', 'step': 6491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:35.286499', 'step': 6491, 'epoch': 1} {'type': 'loss', 'content': 0.1320672333240509, 'timestamp': '2025-10-01 04:20:35.311736', 'step': 6492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:35.342848', 'step': 6492, 'epoch': 1} {'type': 'loss', 'content': 0.18871307373046875, 'timestamp': '2025-10-01 04:20:35.345086', 'step': 6493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:35.376809', 'step': 6493, 'epoch': 1} {'type': 'loss', 'content': 0.14918000996112823, 'timestamp': '2025-10-01 04:20:35.381410', 'step': 6494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:35.413149', 'step': 6494, 'epoch': 1} {'type': 'loss', 'content': 0.1576438546180725, 'timestamp': '2025-10-01 04:20:35.415205', 'step': 6495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:35.446099', 'step': 6495, 'epoch': 1} {'type': 'loss', 'content': 0.13994479179382324, 'timestamp': '2025-10-01 04:20:35.469720', 'step': 6496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:35.501195', 'step': 6496, 'epoch': 1} {'type': 'loss', 'content': 0.16799932718276978, 'timestamp': '2025-10-01 04:20:35.503828', 'step': 6497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:35.540853', 'step': 6497, 'epoch': 1} {'type': 'loss', 'content': 0.1634230613708496, 'timestamp': '2025-10-01 04:20:35.542936', 'step': 6498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:35.574691', 'step': 6498, 'epoch': 1} {'type': 'loss', 'content': 0.22922849655151367, 'timestamp': '2025-10-01 04:20:35.576971', 'step': 6499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:35.607754', 'step': 6499, 'epoch': 1} {'type': 'loss', 'content': 0.2677031457424164, 'timestamp': '2025-10-01 04:20:35.631370', 'step': 6500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-10-01 04:20:41.586573', 'step': 6500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:41.627381', 'step': 6500, 'epoch': 1} {'type': 'loss', 'content': 0.20857477188110352, 'timestamp': '2025-10-01 04:20:41.629300', 'step': 6501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:41.678364', 'step': 6501, 'epoch': 1} {'type': 'loss', 'content': 0.19200393557548523, 'timestamp': '2025-10-01 04:20:41.680709', 'step': 6502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:41.725293', 'step': 6502, 'epoch': 1} {'type': 'loss', 'content': 0.17950467765331268, 'timestamp': '2025-10-01 04:20:41.727568', 'step': 6503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:41.767659', 'step': 6503, 'epoch': 1} {'type': 'loss', 'content': 0.19742248952388763, 'timestamp': '2025-10-01 04:20:41.791500', 'step': 6504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:41.834460', 'step': 6504, 'epoch': 1} {'type': 'loss', 'content': 0.13467344641685486, 'timestamp': '2025-10-01 04:20:41.836524', 'step': 6505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:41.869537', 'step': 6505, 'epoch': 1} {'type': 'loss', 'content': 0.08473976701498032, 'timestamp': '2025-10-01 04:20:41.871600', 'step': 6506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:41.918002', 'step': 6506, 'epoch': 1} {'type': 'loss', 'content': 0.14652368426322937, 'timestamp': '2025-10-01 04:20:41.920163', 'step': 6507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:41.967087', 'step': 6507, 'epoch': 1} {'type': 'loss', 'content': 0.17009833455085754, 'timestamp': '2025-10-01 04:20:41.990708', 'step': 6508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:42.039313', 'step': 6508, 'epoch': 1} {'type': 'loss', 'content': 0.16745944321155548, 'timestamp': '2025-10-01 04:20:42.041285', 'step': 6509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:42.094974', 'step': 6509, 'epoch': 1} {'type': 'loss', 'content': 0.11003723740577698, 'timestamp': '2025-10-01 04:20:42.096973', 'step': 6510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:42.132848', 'step': 6510, 'epoch': 1} {'type': 'loss', 'content': 0.13196080923080444, 'timestamp': '2025-10-01 04:20:42.135500', 'step': 6511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:42.177122', 'step': 6511, 'epoch': 1} {'type': 'loss', 'content': 0.20040105283260345, 'timestamp': '2025-10-01 04:20:42.200717', 'step': 6512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:42.244825', 'step': 6512, 'epoch': 1} {'type': 'loss', 'content': 0.10247105360031128, 'timestamp': '2025-10-01 04:20:42.246827', 'step': 6513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:42.300853', 'step': 6513, 'epoch': 1} {'type': 'loss', 'content': 0.1731264889240265, 'timestamp': '2025-10-01 04:20:42.302895', 'step': 6514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:42.347963', 'step': 6514, 'epoch': 1} {'type': 'loss', 'content': 0.15067006647586823, 'timestamp': '2025-10-01 04:20:42.349981', 'step': 6515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:42.392116', 'step': 6515, 'epoch': 1} {'type': 'loss', 'content': 0.1886838674545288, 'timestamp': '2025-10-01 04:20:42.415611', 'step': 6516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:42.480999', 'step': 6516, 'epoch': 1} {'type': 'loss', 'content': 0.10903653502464294, 'timestamp': '2025-10-01 04:20:42.483040', 'step': 6517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:42.526550', 'step': 6517, 'epoch': 1} {'type': 'loss', 'content': 0.1489323526620865, 'timestamp': '2025-10-01 04:20:42.528692', 'step': 6518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:42.562676', 'step': 6518, 'epoch': 1} {'type': 'loss', 'content': 0.17473791539669037, 'timestamp': '2025-10-01 04:20:42.564739', 'step': 6519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:42.598964', 'step': 6519, 'epoch': 1} {'type': 'loss', 'content': 0.12200659513473511, 'timestamp': '2025-10-01 04:20:42.622481', 'step': 6520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:42.679280', 'step': 6520, 'epoch': 1} {'type': 'loss', 'content': 0.10902564227581024, 'timestamp': '2025-10-01 04:20:42.681340', 'step': 6521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:42.715129', 'step': 6521, 'epoch': 1} {'type': 'loss', 'content': 0.17529213428497314, 'timestamp': '2025-10-01 04:20:42.717106', 'step': 6522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:42.748607', 'step': 6522, 'epoch': 1} {'type': 'loss', 'content': 0.12492727488279343, 'timestamp': '2025-10-01 04:20:42.750583', 'step': 6523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:42.784068', 'step': 6523, 'epoch': 1} {'type': 'loss', 'content': 0.16557316482067108, 'timestamp': '2025-10-01 04:20:42.807540', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:20:56.187999', 'step': 6524, 'epoch': 1} {'type': 'pplx', 'content': 9030.195141426062, 'timestamp': '2025-10-01 04:20:56.190943', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:56.223930', 'step': 6524, 'epoch': 1} {'type': 'loss', 'content': 0.1131356731057167, 'timestamp': '2025-10-01 04:20:56.225963', 'step': 6525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:56.286907', 'step': 6525, 'epoch': 1} {'type': 'loss', 'content': 0.07240792363882065, 'timestamp': '2025-10-01 04:20:56.288984', 'step': 6526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:56.336373', 'step': 6526, 'epoch': 1} {'type': 'loss', 'content': 0.1963185966014862, 'timestamp': '2025-10-01 04:20:56.338640', 'step': 6527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:56.382862', 'step': 6527, 'epoch': 1} {'type': 'loss', 'content': 0.09837973862886429, 'timestamp': '2025-10-01 04:20:56.410480', 'step': 6528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:56.444031', 'step': 6528, 'epoch': 1} {'type': 'loss', 'content': 0.15035676956176758, 'timestamp': '2025-10-01 04:20:56.446092', 'step': 6529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:56.479412', 'step': 6529, 'epoch': 1} {'type': 'loss', 'content': 0.2616801857948303, 'timestamp': '2025-10-01 04:20:56.481408', 'step': 6530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:56.518457', 'step': 6530, 'epoch': 1} {'type': 'loss', 'content': 0.1754959523677826, 'timestamp': '2025-10-01 04:20:56.520621', 'step': 6531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:56.553523', 'step': 6531, 'epoch': 1} {'type': 'loss', 'content': 0.1257135272026062, 'timestamp': '2025-10-01 04:20:56.578318', 'step': 6532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:56.632057', 'step': 6532, 'epoch': 1} {'type': 'loss', 'content': 0.09842849522829056, 'timestamp': '2025-10-01 04:20:56.634088', 'step': 6533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:56.668026', 'step': 6533, 'epoch': 1} {'type': 'loss', 'content': 0.1413661390542984, 'timestamp': '2025-10-01 04:20:56.670011', 'step': 6534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:56.712642', 'step': 6534, 'epoch': 1} {'type': 'loss', 'content': 0.08466643840074539, 'timestamp': '2025-10-01 04:20:56.714636', 'step': 6535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:56.748633', 'step': 6535, 'epoch': 1} {'type': 'loss', 'content': 0.12481901049613953, 'timestamp': '2025-10-01 04:20:56.772301', 'step': 6536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:56.817220', 'step': 6536, 'epoch': 1} {'type': 'loss', 'content': 0.061320848762989044, 'timestamp': '2025-10-01 04:20:56.819189', 'step': 6537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:56.866296', 'step': 6537, 'epoch': 1} {'type': 'loss', 'content': 0.1532188355922699, 'timestamp': '2025-10-01 04:20:56.868153', 'step': 6538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:56.904016', 'step': 6538, 'epoch': 1} {'type': 'loss', 'content': 0.17371375858783722, 'timestamp': '2025-10-01 04:20:56.906198', 'step': 6539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:56.950838', 'step': 6539, 'epoch': 1} {'type': 'loss', 'content': 0.15369437634944916, 'timestamp': '2025-10-01 04:20:56.974855', 'step': 6540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:57.030638', 'step': 6540, 'epoch': 1} {'type': 'loss', 'content': 0.14397133886814117, 'timestamp': '2025-10-01 04:20:57.032980', 'step': 6541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:57.067877', 'step': 6541, 'epoch': 1} {'type': 'loss', 'content': 0.13589255511760712, 'timestamp': '2025-10-01 04:20:57.070040', 'step': 6542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:57.111580', 'step': 6542, 'epoch': 1} {'type': 'loss', 'content': 0.18781711161136627, 'timestamp': '2025-10-01 04:20:57.113747', 'step': 6543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:57.146961', 'step': 6543, 'epoch': 1} {'type': 'loss', 'content': 0.16064639389514923, 'timestamp': '2025-10-01 04:20:57.170739', 'step': 6544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:57.204514', 'step': 6544, 'epoch': 1} {'type': 'loss', 'content': 0.08817123621702194, 'timestamp': '2025-10-01 04:20:57.206637', 'step': 6545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:57.253264', 'step': 6545, 'epoch': 1} {'type': 'loss', 'content': 0.12449950724840164, 'timestamp': '2025-10-01 04:20:57.255793', 'step': 6546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:57.291025', 'step': 6546, 'epoch': 1} {'type': 'loss', 'content': 0.057624299079179764, 'timestamp': '2025-10-01 04:20:57.293040', 'step': 6547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:57.327846', 'step': 6547, 'epoch': 1} {'type': 'loss', 'content': 0.10535698384046555, 'timestamp': '2025-10-01 04:20:57.352073', 'step': 6548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:57.387784', 'step': 6548, 'epoch': 1} {'type': 'loss', 'content': 0.20542515814304352, 'timestamp': '2025-10-01 04:20:57.390672', 'step': 6549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:57.423336', 'step': 6549, 'epoch': 1} {'type': 'loss', 'content': 0.18987338244915009, 'timestamp': '2025-10-01 04:20:57.425371', 'step': 6550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:57.466770', 'step': 6550, 'epoch': 1} {'type': 'loss', 'content': 0.19479821622371674, 'timestamp': '2025-10-01 04:20:57.468818', 'step': 6551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:57.502333', 'step': 6551, 'epoch': 1} {'type': 'loss', 'content': 0.16563840210437775, 'timestamp': '2025-10-01 04:20:57.525987', 'step': 6552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:57.566507', 'step': 6552, 'epoch': 1} {'type': 'loss', 'content': 0.18427811563014984, 'timestamp': '2025-10-01 04:20:57.568539', 'step': 6553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:57.607159', 'step': 6553, 'epoch': 1} {'type': 'loss', 'content': 0.045215558260679245, 'timestamp': '2025-10-01 04:20:57.609159', 'step': 6554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:57.641685', 'step': 6554, 'epoch': 1} {'type': 'loss', 'content': 0.16166578233242035, 'timestamp': '2025-10-01 04:20:57.645029', 'step': 6555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:57.686470', 'step': 6555, 'epoch': 1} {'type': 'loss', 'content': 0.19455531239509583, 'timestamp': '2025-10-01 04:20:57.710210', 'step': 6556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:57.750075', 'step': 6556, 'epoch': 1} {'type': 'loss', 'content': 0.12293167412281036, 'timestamp': '2025-10-01 04:20:57.752166', 'step': 6557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:20:57.799103', 'step': 6557, 'epoch': 1} {'type': 'loss', 'content': 0.13624951243400574, 'timestamp': '2025-10-01 04:20:57.803293', 'step': 6558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:57.840333', 'step': 6558, 'epoch': 1} {'type': 'loss', 'content': 0.22045664489269257, 'timestamp': '2025-10-01 04:20:57.842790', 'step': 6559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:57.881586', 'step': 6559, 'epoch': 1} {'type': 'loss', 'content': 0.15748991072177887, 'timestamp': '2025-10-01 04:20:57.905430', 'step': 6560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:57.948358', 'step': 6560, 'epoch': 1} {'type': 'loss', 'content': 0.14045675098896027, 'timestamp': '2025-10-01 04:20:57.950440', 'step': 6561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:57.984308', 'step': 6561, 'epoch': 1} {'type': 'loss', 'content': 0.180076465010643, 'timestamp': '2025-10-01 04:20:57.986114', 'step': 6562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:58.019833', 'step': 6562, 'epoch': 1} {'type': 'loss', 'content': 0.2118893265724182, 'timestamp': '2025-10-01 04:20:58.023908', 'step': 6563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:58.057147', 'step': 6563, 'epoch': 1} {'type': 'loss', 'content': 0.23444300889968872, 'timestamp': '2025-10-01 04:20:58.080915', 'step': 6564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:58.113361', 'step': 6564, 'epoch': 1} {'type': 'loss', 'content': 0.11544865369796753, 'timestamp': '2025-10-01 04:20:58.115399', 'step': 6565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:58.152832', 'step': 6565, 'epoch': 1} {'type': 'loss', 'content': 0.1434834897518158, 'timestamp': '2025-10-01 04:20:58.155495', 'step': 6566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:58.187943', 'step': 6566, 'epoch': 1} {'type': 'loss', 'content': 0.13958007097244263, 'timestamp': '2025-10-01 04:20:58.190100', 'step': 6567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:58.226933', 'step': 6567, 'epoch': 1} {'type': 'loss', 'content': 0.1379932314157486, 'timestamp': '2025-10-01 04:20:58.250356', 'step': 6568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:58.283856', 'step': 6568, 'epoch': 1} {'type': 'loss', 'content': 0.1368180513381958, 'timestamp': '2025-10-01 04:20:58.286082', 'step': 6569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:58.323714', 'step': 6569, 'epoch': 1} {'type': 'loss', 'content': 0.26615050435066223, 'timestamp': '2025-10-01 04:20:58.326492', 'step': 6570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:58.359991', 'step': 6570, 'epoch': 1} {'type': 'loss', 'content': 0.1495119333267212, 'timestamp': '2025-10-01 04:20:58.362393', 'step': 6571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:58.394055', 'step': 6571, 'epoch': 1} {'type': 'loss', 'content': 0.1446322649717331, 'timestamp': '2025-10-01 04:20:58.417543', 'step': 6572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:58.449337', 'step': 6572, 'epoch': 1} {'type': 'loss', 'content': 0.10067711770534515, 'timestamp': '2025-10-01 04:20:58.451284', 'step': 6573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:58.483691', 'step': 6573, 'epoch': 1} {'type': 'loss', 'content': 0.12312539666891098, 'timestamp': '2025-10-01 04:20:58.485714', 'step': 6574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:58.518453', 'step': 6574, 'epoch': 1} {'type': 'loss', 'content': 0.19884207844734192, 'timestamp': '2025-10-01 04:20:58.520568', 'step': 6575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:58.554859', 'step': 6575, 'epoch': 1} {'type': 'loss', 'content': 0.10674472898244858, 'timestamp': '2025-10-01 04:20:58.578508', 'step': 6576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:58.612693', 'step': 6576, 'epoch': 1} {'type': 'loss', 'content': 0.10426449775695801, 'timestamp': '2025-10-01 04:20:58.614687', 'step': 6577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:58.648017', 'step': 6577, 'epoch': 1} {'type': 'loss', 'content': 0.14163541793823242, 'timestamp': '2025-10-01 04:20:58.650115', 'step': 6578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:58.682248', 'step': 6578, 'epoch': 1} {'type': 'loss', 'content': 0.13117389380931854, 'timestamp': '2025-10-01 04:20:58.684388', 'step': 6579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:58.722682', 'step': 6579, 'epoch': 1} {'type': 'loss', 'content': 0.20234568417072296, 'timestamp': '2025-10-01 04:20:58.746143', 'step': 6580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:58.784260', 'step': 6580, 'epoch': 1} {'type': 'loss', 'content': 0.1639636754989624, 'timestamp': '2025-10-01 04:20:58.789033', 'step': 6581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:58.825994', 'step': 6581, 'epoch': 1} {'type': 'loss', 'content': 0.12627717852592468, 'timestamp': '2025-10-01 04:20:58.827948', 'step': 6582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:58.861232', 'step': 6582, 'epoch': 1} {'type': 'loss', 'content': 0.11810927093029022, 'timestamp': '2025-10-01 04:20:58.864973', 'step': 6583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:58.903976', 'step': 6583, 'epoch': 1} {'type': 'loss', 'content': 0.1803472638130188, 'timestamp': '2025-10-01 04:20:58.928581', 'step': 6584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:58.960126', 'step': 6584, 'epoch': 1} {'type': 'loss', 'content': 0.23730990290641785, 'timestamp': '2025-10-01 04:20:58.974340', 'step': 6585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:59.009368', 'step': 6585, 'epoch': 1} {'type': 'loss', 'content': 0.08263938874006271, 'timestamp': '2025-10-01 04:20:59.011621', 'step': 6586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:59.051829', 'step': 6586, 'epoch': 1} {'type': 'loss', 'content': 0.1307959258556366, 'timestamp': '2025-10-01 04:20:59.054038', 'step': 6587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:59.090503', 'step': 6587, 'epoch': 1} {'type': 'loss', 'content': 0.18069617450237274, 'timestamp': '2025-10-01 04:20:59.115051', 'step': 6588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:59.148823', 'step': 6588, 'epoch': 1} {'type': 'loss', 'content': 0.1529795080423355, 'timestamp': '2025-10-01 04:20:59.150734', 'step': 6589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:59.187969', 'step': 6589, 'epoch': 1} {'type': 'loss', 'content': 0.1280200332403183, 'timestamp': '2025-10-01 04:20:59.190351', 'step': 6590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:59.228985', 'step': 6590, 'epoch': 1} {'type': 'loss', 'content': 0.08963442593812943, 'timestamp': '2025-10-01 04:20:59.231073', 'step': 6591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:59.274713', 'step': 6591, 'epoch': 1} {'type': 'loss', 'content': 0.23372875154018402, 'timestamp': '2025-10-01 04:20:59.298167', 'step': 6592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:59.331589', 'step': 6592, 'epoch': 1} {'type': 'loss', 'content': 0.10324500501155853, 'timestamp': '2025-10-01 04:20:59.333809', 'step': 6593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:59.381684', 'step': 6593, 'epoch': 1} {'type': 'loss', 'content': 0.16424758732318878, 'timestamp': '2025-10-01 04:20:59.383753', 'step': 6594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:59.426426', 'step': 6594, 'epoch': 1} {'type': 'loss', 'content': 0.11578072607517242, 'timestamp': '2025-10-01 04:20:59.428321', 'step': 6595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:59.469421', 'step': 6595, 'epoch': 1} {'type': 'loss', 'content': 0.12511542439460754, 'timestamp': '2025-10-01 04:20:59.492922', 'step': 6596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:59.525964', 'step': 6596, 'epoch': 1} {'type': 'loss', 'content': 0.1884123831987381, 'timestamp': '2025-10-01 04:20:59.528018', 'step': 6597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:59.559403', 'step': 6597, 'epoch': 1} {'type': 'loss', 'content': 0.08320685476064682, 'timestamp': '2025-10-01 04:20:59.561586', 'step': 6598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:20:59.592641', 'step': 6598, 'epoch': 1} {'type': 'loss', 'content': 0.09332649409770966, 'timestamp': '2025-10-01 04:20:59.595667', 'step': 6599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:20:59.626807', 'step': 6599, 'epoch': 1} {'type': 'loss', 'content': 0.09272850304841995, 'timestamp': '2025-10-01 04:20:59.650723', 'step': 6600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:59.681977', 'step': 6600, 'epoch': 1} {'type': 'loss', 'content': 0.10043380409479141, 'timestamp': '2025-10-01 04:20:59.684402', 'step': 6601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:59.715294', 'step': 6601, 'epoch': 1} {'type': 'loss', 'content': 0.1036728024482727, 'timestamp': '2025-10-01 04:20:59.717630', 'step': 6602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:20:59.753863', 'step': 6602, 'epoch': 1} {'type': 'loss', 'content': 0.1697389781475067, 'timestamp': '2025-10-01 04:20:59.756360', 'step': 6603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:59.787459', 'step': 6603, 'epoch': 1} {'type': 'loss', 'content': 0.27608147263526917, 'timestamp': '2025-10-01 04:20:59.812784', 'step': 6604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:20:59.843806', 'step': 6604, 'epoch': 1} {'type': 'loss', 'content': 0.08583643287420273, 'timestamp': '2025-10-01 04:20:59.846321', 'step': 6605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:59.877811', 'step': 6605, 'epoch': 1} {'type': 'loss', 'content': 0.11714182049036026, 'timestamp': '2025-10-01 04:20:59.880360', 'step': 6606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:20:59.919533', 'step': 6606, 'epoch': 1} {'type': 'loss', 'content': 0.10593114793300629, 'timestamp': '2025-10-01 04:20:59.922302', 'step': 6607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:20:59.959357', 'step': 6607, 'epoch': 1} {'type': 'loss', 'content': 0.14166736602783203, 'timestamp': '2025-10-01 04:20:59.983134', 'step': 6608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:00.026539', 'step': 6608, 'epoch': 1} {'type': 'loss', 'content': 0.17743952572345734, 'timestamp': '2025-10-01 04:21:00.029012', 'step': 6609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:00.061548', 'step': 6609, 'epoch': 1} {'type': 'loss', 'content': 0.23144009709358215, 'timestamp': '2025-10-01 04:21:00.063648', 'step': 6610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:00.105696', 'step': 6610, 'epoch': 1} {'type': 'loss', 'content': 0.16770122945308685, 'timestamp': '2025-10-01 04:21:00.108029', 'step': 6611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.155269', 'step': 6611, 'epoch': 1} {'type': 'loss', 'content': 0.1373395323753357, 'timestamp': '2025-10-01 04:21:00.178944', 'step': 6612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.211551', 'step': 6612, 'epoch': 1} {'type': 'loss', 'content': 0.15683843195438385, 'timestamp': '2025-10-01 04:21:00.213981', 'step': 6613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.252205', 'step': 6613, 'epoch': 1} {'type': 'loss', 'content': 0.16802580654621124, 'timestamp': '2025-10-01 04:21:00.254581', 'step': 6614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.288590', 'step': 6614, 'epoch': 1} {'type': 'loss', 'content': 0.08781681209802628, 'timestamp': '2025-10-01 04:21:00.291156', 'step': 6615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.335817', 'step': 6615, 'epoch': 1} {'type': 'loss', 'content': 0.18010865151882172, 'timestamp': '2025-10-01 04:21:00.359468', 'step': 6616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:00.393134', 'step': 6616, 'epoch': 1} {'type': 'loss', 'content': 0.10624994337558746, 'timestamp': '2025-10-01 04:21:00.395641', 'step': 6617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:00.432503', 'step': 6617, 'epoch': 1} {'type': 'loss', 'content': 0.09914755076169968, 'timestamp': '2025-10-01 04:21:00.434679', 'step': 6618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.475795', 'step': 6618, 'epoch': 1} {'type': 'loss', 'content': 0.10431032627820969, 'timestamp': '2025-10-01 04:21:00.478055', 'step': 6619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.509320', 'step': 6619, 'epoch': 1} {'type': 'loss', 'content': 0.10369101911783218, 'timestamp': '2025-10-01 04:21:00.534774', 'step': 6620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:00.565808', 'step': 6620, 'epoch': 1} {'type': 'loss', 'content': 0.1478147953748703, 'timestamp': '2025-10-01 04:21:00.567913', 'step': 6621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:00.598343', 'step': 6621, 'epoch': 1} {'type': 'loss', 'content': 0.12508489191532135, 'timestamp': '2025-10-01 04:21:00.600924', 'step': 6622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.636265', 'step': 6622, 'epoch': 1} {'type': 'loss', 'content': 0.15439072251319885, 'timestamp': '2025-10-01 04:21:00.638567', 'step': 6623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.674691', 'step': 6623, 'epoch': 1} {'type': 'loss', 'content': 0.11500076949596405, 'timestamp': '2025-10-01 04:21:00.698470', 'step': 6624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:00.737007', 'step': 6624, 'epoch': 1} {'type': 'loss', 'content': 0.1757306307554245, 'timestamp': '2025-10-01 04:21:00.739389', 'step': 6625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.773858', 'step': 6625, 'epoch': 1} {'type': 'loss', 'content': 0.13923491537570953, 'timestamp': '2025-10-01 04:21:00.776271', 'step': 6626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:00.813430', 'step': 6626, 'epoch': 1} {'type': 'loss', 'content': 0.08292045444250107, 'timestamp': '2025-10-01 04:21:00.815891', 'step': 6627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:00.846925', 'step': 6627, 'epoch': 1} {'type': 'loss', 'content': 0.17343339323997498, 'timestamp': '2025-10-01 04:21:00.870758', 'step': 6628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.903352', 'step': 6628, 'epoch': 1} {'type': 'loss', 'content': 0.20463162660598755, 'timestamp': '2025-10-01 04:21:00.905345', 'step': 6629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:00.936221', 'step': 6629, 'epoch': 1} {'type': 'loss', 'content': 0.1207975447177887, 'timestamp': '2025-10-01 04:21:00.938383', 'step': 6630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:00.970367', 'step': 6630, 'epoch': 1} {'type': 'loss', 'content': 0.2187403291463852, 'timestamp': '2025-10-01 04:21:00.972402', 'step': 6631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:01.008006', 'step': 6631, 'epoch': 1} {'type': 'loss', 'content': 0.11668327450752258, 'timestamp': '2025-10-01 04:21:01.031592', 'step': 6632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:01.062068', 'step': 6632, 'epoch': 1} {'type': 'loss', 'content': 0.13735859096050262, 'timestamp': '2025-10-01 04:21:01.063976', 'step': 6633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:01.100506', 'step': 6633, 'epoch': 1} {'type': 'loss', 'content': 0.14677037298679352, 'timestamp': '2025-10-01 04:21:01.102509', 'step': 6634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:01.135544', 'step': 6634, 'epoch': 1} {'type': 'loss', 'content': 0.2170693725347519, 'timestamp': '2025-10-01 04:21:01.137639', 'step': 6635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:01.173298', 'step': 6635, 'epoch': 1} {'type': 'loss', 'content': 0.15519572794437408, 'timestamp': '2025-10-01 04:21:01.197176', 'step': 6636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:01.231690', 'step': 6636, 'epoch': 1} {'type': 'loss', 'content': 0.04337659478187561, 'timestamp': '2025-10-01 04:21:01.234132', 'step': 6637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:01.272656', 'step': 6637, 'epoch': 1} {'type': 'loss', 'content': 0.11634316295385361, 'timestamp': '2025-10-01 04:21:01.275081', 'step': 6638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:01.312795', 'step': 6638, 'epoch': 1} {'type': 'loss', 'content': 0.15951049327850342, 'timestamp': '2025-10-01 04:21:01.315043', 'step': 6639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:01.346378', 'step': 6639, 'epoch': 1} {'type': 'loss', 'content': 0.15377841889858246, 'timestamp': '2025-10-01 04:21:01.370018', 'step': 6640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:01.403412', 'step': 6640, 'epoch': 1} {'type': 'loss', 'content': 0.2755124866962433, 'timestamp': '2025-10-01 04:21:01.405463', 'step': 6641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:01.436689', 'step': 6641, 'epoch': 1} {'type': 'loss', 'content': 0.11986634135246277, 'timestamp': '2025-10-01 04:21:01.438888', 'step': 6642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:21:01.473037', 'step': 6642, 'epoch': 1} {'type': 'loss', 'content': 0.23254869878292084, 'timestamp': '2025-10-01 04:21:01.477432', 'step': 6643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:01.508997', 'step': 6643, 'epoch': 1} {'type': 'loss', 'content': 0.16251207888126373, 'timestamp': '2025-10-01 04:21:01.532485', 'step': 6644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:01.563693', 'step': 6644, 'epoch': 1} {'type': 'loss', 'content': 0.09650208055973053, 'timestamp': '2025-10-01 04:21:01.565859', 'step': 6645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:01.602857', 'step': 6645, 'epoch': 1} {'type': 'loss', 'content': 0.11317932605743408, 'timestamp': '2025-10-01 04:21:01.604777', 'step': 6646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:01.639084', 'step': 6646, 'epoch': 1} {'type': 'loss', 'content': 0.11288350075483322, 'timestamp': '2025-10-01 04:21:01.641205', 'step': 6647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:01.681177', 'step': 6647, 'epoch': 1} {'type': 'loss', 'content': 0.09395810961723328, 'timestamp': '2025-10-01 04:21:01.704611', 'step': 6648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:01.745521', 'step': 6648, 'epoch': 1} {'type': 'loss', 'content': 0.2376440316438675, 'timestamp': '2025-10-01 04:21:01.747538', 'step': 6649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:01.777981', 'step': 6649, 'epoch': 1} {'type': 'loss', 'content': 0.20759440958499908, 'timestamp': '2025-10-01 04:21:01.779964', 'step': 6650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:01.819508', 'step': 6650, 'epoch': 1} {'type': 'loss', 'content': 0.09141063690185547, 'timestamp': '2025-10-01 04:21:01.821574', 'step': 6651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:01.852285', 'step': 6651, 'epoch': 1} {'type': 'loss', 'content': 0.23211659491062164, 'timestamp': '2025-10-01 04:21:01.875826', 'step': 6652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:01.907390', 'step': 6652, 'epoch': 1} {'type': 'loss', 'content': 0.22849057614803314, 'timestamp': '2025-10-01 04:21:01.909390', 'step': 6653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:01.949947', 'step': 6653, 'epoch': 1} {'type': 'loss', 'content': 0.11936371773481369, 'timestamp': '2025-10-01 04:21:01.952066', 'step': 6654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:01.992948', 'step': 6654, 'epoch': 1} {'type': 'loss', 'content': 0.17434608936309814, 'timestamp': '2025-10-01 04:21:01.994974', 'step': 6655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:02.029875', 'step': 6655, 'epoch': 1} {'type': 'loss', 'content': 0.1633782833814621, 'timestamp': '2025-10-01 04:21:02.053594', 'step': 6656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:02.091851', 'step': 6656, 'epoch': 1} {'type': 'loss', 'content': 0.13671046495437622, 'timestamp': '2025-10-01 04:21:02.094364', 'step': 6657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:02.126634', 'step': 6657, 'epoch': 1} {'type': 'loss', 'content': 0.1989763081073761, 'timestamp': '2025-10-01 04:21:02.128777', 'step': 6658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:02.166650', 'step': 6658, 'epoch': 1} {'type': 'loss', 'content': 0.10414788126945496, 'timestamp': '2025-10-01 04:21:02.168779', 'step': 6659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:02.208166', 'step': 6659, 'epoch': 1} {'type': 'loss', 'content': 0.19168543815612793, 'timestamp': '2025-10-01 04:21:02.231642', 'step': 6660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:02.265936', 'step': 6660, 'epoch': 1} {'type': 'loss', 'content': 0.046153903007507324, 'timestamp': '2025-10-01 04:21:02.267904', 'step': 6661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:02.301050', 'step': 6661, 'epoch': 1} {'type': 'loss', 'content': 0.11225679516792297, 'timestamp': '2025-10-01 04:21:02.303352', 'step': 6662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:02.338070', 'step': 6662, 'epoch': 1} {'type': 'loss', 'content': 0.10522439330816269, 'timestamp': '2025-10-01 04:21:02.340208', 'step': 6663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:02.371705', 'step': 6663, 'epoch': 1} {'type': 'loss', 'content': 0.1597621738910675, 'timestamp': '2025-10-01 04:21:02.395281', 'step': 6664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:02.433891', 'step': 6664, 'epoch': 1} {'type': 'loss', 'content': 0.14882294833660126, 'timestamp': '2025-10-01 04:21:02.435634', 'step': 6665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:02.470801', 'step': 6665, 'epoch': 1} {'type': 'loss', 'content': 0.0948978066444397, 'timestamp': '2025-10-01 04:21:02.472891', 'step': 6666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:02.506799', 'step': 6666, 'epoch': 1} {'type': 'loss', 'content': 0.3407028019428253, 'timestamp': '2025-10-01 04:21:02.508872', 'step': 6667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:02.546929', 'step': 6667, 'epoch': 1} {'type': 'loss', 'content': 0.1621343344449997, 'timestamp': '2025-10-01 04:21:02.570514', 'step': 6668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:02.607911', 'step': 6668, 'epoch': 1} {'type': 'loss', 'content': 0.1303420066833496, 'timestamp': '2025-10-01 04:21:02.610003', 'step': 6669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:02.641514', 'step': 6669, 'epoch': 1} {'type': 'loss', 'content': 0.09291546791791916, 'timestamp': '2025-10-01 04:21:02.643591', 'step': 6670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:02.678407', 'step': 6670, 'epoch': 1} {'type': 'loss', 'content': 0.043848246335983276, 'timestamp': '2025-10-01 04:21:02.681667', 'step': 6671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:02.715854', 'step': 6671, 'epoch': 1} {'type': 'loss', 'content': 0.1158641129732132, 'timestamp': '2025-10-01 04:21:02.739952', 'step': 6672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:02.770798', 'step': 6672, 'epoch': 1} {'type': 'loss', 'content': 0.20996762812137604, 'timestamp': '2025-10-01 04:21:02.772888', 'step': 6673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:02.803693', 'step': 6673, 'epoch': 1} {'type': 'loss', 'content': 0.19340306520462036, 'timestamp': '2025-10-01 04:21:02.805756', 'step': 6674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:02.837954', 'step': 6674, 'epoch': 1} {'type': 'loss', 'content': 0.20929068326950073, 'timestamp': '2025-10-01 04:21:02.840005', 'step': 6675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:02.876853', 'step': 6675, 'epoch': 1} {'type': 'loss', 'content': 0.08399249613285065, 'timestamp': '2025-10-01 04:21:02.900859', 'step': 6676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:02.938997', 'step': 6676, 'epoch': 1} {'type': 'loss', 'content': 0.11242224276065826, 'timestamp': '2025-10-01 04:21:02.941054', 'step': 6677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:02.975117', 'step': 6677, 'epoch': 1} {'type': 'loss', 'content': 0.14879341423511505, 'timestamp': '2025-10-01 04:21:02.977162', 'step': 6678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:03.009171', 'step': 6678, 'epoch': 1} {'type': 'loss', 'content': 0.22361627221107483, 'timestamp': '2025-10-01 04:21:03.011142', 'step': 6679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:03.042951', 'step': 6679, 'epoch': 1} {'type': 'loss', 'content': 0.18569453060626984, 'timestamp': '2025-10-01 04:21:03.066390', 'step': 6680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:03.102441', 'step': 6680, 'epoch': 1} {'type': 'loss', 'content': 0.08068017661571503, 'timestamp': '2025-10-01 04:21:03.105136', 'step': 6681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:03.142437', 'step': 6681, 'epoch': 1} {'type': 'loss', 'content': 0.1636190414428711, 'timestamp': '2025-10-01 04:21:03.144884', 'step': 6682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:03.179880', 'step': 6682, 'epoch': 1} {'type': 'loss', 'content': 0.2150086909532547, 'timestamp': '2025-10-01 04:21:03.181915', 'step': 6683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:03.215620', 'step': 6683, 'epoch': 1} {'type': 'loss', 'content': 0.17054513096809387, 'timestamp': '2025-10-01 04:21:03.239133', 'step': 6684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:03.270567', 'step': 6684, 'epoch': 1} {'type': 'loss', 'content': 0.18209247291088104, 'timestamp': '2025-10-01 04:21:03.272702', 'step': 6685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:03.307695', 'step': 6685, 'epoch': 1} {'type': 'loss', 'content': 0.09958690404891968, 'timestamp': '2025-10-01 04:21:03.309817', 'step': 6686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:03.343969', 'step': 6686, 'epoch': 1} {'type': 'loss', 'content': 0.13436736166477203, 'timestamp': '2025-10-01 04:21:03.346145', 'step': 6687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:03.377072', 'step': 6687, 'epoch': 1} {'type': 'loss', 'content': 0.1741311103105545, 'timestamp': '2025-10-01 04:21:03.406170', 'step': 6688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:03.441132', 'step': 6688, 'epoch': 1} {'type': 'loss', 'content': 0.23352442681789398, 'timestamp': '2025-10-01 04:21:03.443498', 'step': 6689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:03.474645', 'step': 6689, 'epoch': 1} {'type': 'loss', 'content': 0.20673304796218872, 'timestamp': '2025-10-01 04:21:03.477189', 'step': 6690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:03.516064', 'step': 6690, 'epoch': 1} {'type': 'loss', 'content': 0.1597772240638733, 'timestamp': '2025-10-01 04:21:03.518033', 'step': 6691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:03.557633', 'step': 6691, 'epoch': 1} {'type': 'loss', 'content': 0.12596939504146576, 'timestamp': '2025-10-01 04:21:03.581203', 'step': 6692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:03.612383', 'step': 6692, 'epoch': 1} {'type': 'loss', 'content': 0.17588713765144348, 'timestamp': '2025-10-01 04:21:03.614569', 'step': 6693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:03.653890', 'step': 6693, 'epoch': 1} {'type': 'loss', 'content': 0.10919630527496338, 'timestamp': '2025-10-01 04:21:03.655981', 'step': 6694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:03.694921', 'step': 6694, 'epoch': 1} {'type': 'loss', 'content': 0.12874963879585266, 'timestamp': '2025-10-01 04:21:03.696990', 'step': 6695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:03.730205', 'step': 6695, 'epoch': 1} {'type': 'loss', 'content': 0.12829580903053284, 'timestamp': '2025-10-01 04:21:03.753706', 'step': 6696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:03.790268', 'step': 6696, 'epoch': 1} {'type': 'loss', 'content': 0.1702253371477127, 'timestamp': '2025-10-01 04:21:03.792088', 'step': 6697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:03.838043', 'step': 6697, 'epoch': 1} {'type': 'loss', 'content': 0.18900154531002045, 'timestamp': '2025-10-01 04:21:03.839985', 'step': 6698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:03.876122', 'step': 6698, 'epoch': 1} {'type': 'loss', 'content': 0.17716273665428162, 'timestamp': '2025-10-01 04:21:03.878385', 'step': 6699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:03.912942', 'step': 6699, 'epoch': 1} {'type': 'loss', 'content': 0.11021780222654343, 'timestamp': '2025-10-01 04:21:03.936604', 'step': 6700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:03.968398', 'step': 6700, 'epoch': 1} {'type': 'loss', 'content': 0.2066427618265152, 'timestamp': '2025-10-01 04:21:03.970601', 'step': 6701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:04.008698', 'step': 6701, 'epoch': 1} {'type': 'loss', 'content': 0.2278759926557541, 'timestamp': '2025-10-01 04:21:04.010577', 'step': 6702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:04.046797', 'step': 6702, 'epoch': 1} {'type': 'loss', 'content': 0.1318173110485077, 'timestamp': '2025-10-01 04:21:04.049087', 'step': 6703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:04.084516', 'step': 6703, 'epoch': 1} {'type': 'loss', 'content': 0.1430729776620865, 'timestamp': '2025-10-01 04:21:04.108548', 'step': 6704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:04.144723', 'step': 6704, 'epoch': 1} {'type': 'loss', 'content': 0.17992003262043, 'timestamp': '2025-10-01 04:21:04.146766', 'step': 6705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:21:04.186859', 'step': 6705, 'epoch': 1} {'type': 'loss', 'content': 0.17359322309494019, 'timestamp': '2025-10-01 04:21:04.191281', 'step': 6706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:04.228061', 'step': 6706, 'epoch': 1} {'type': 'loss', 'content': 0.14682939648628235, 'timestamp': '2025-10-01 04:21:04.230260', 'step': 6707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:04.266385', 'step': 6707, 'epoch': 1} {'type': 'loss', 'content': 0.13212671875953674, 'timestamp': '2025-10-01 04:21:04.289905', 'step': 6708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:04.324023', 'step': 6708, 'epoch': 1} {'type': 'loss', 'content': 0.09380091726779938, 'timestamp': '2025-10-01 04:21:04.325953', 'step': 6709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:04.357178', 'step': 6709, 'epoch': 1} {'type': 'loss', 'content': 0.2290334403514862, 'timestamp': '2025-10-01 04:21:04.359218', 'step': 6710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:04.394815', 'step': 6710, 'epoch': 1} {'type': 'loss', 'content': 0.24497130513191223, 'timestamp': '2025-10-01 04:21:04.396812', 'step': 6711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:04.432148', 'step': 6711, 'epoch': 1} {'type': 'loss', 'content': 0.17024098336696625, 'timestamp': '2025-10-01 04:21:04.455843', 'step': 6712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:04.489883', 'step': 6712, 'epoch': 1} {'type': 'loss', 'content': 0.1073995903134346, 'timestamp': '2025-10-01 04:21:04.491920', 'step': 6713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:04.523631', 'step': 6713, 'epoch': 1} {'type': 'loss', 'content': 0.17706844210624695, 'timestamp': '2025-10-01 04:21:04.525505', 'step': 6714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:04.560834', 'step': 6714, 'epoch': 1} {'type': 'loss', 'content': 0.09210043400526047, 'timestamp': '2025-10-01 04:21:04.563151', 'step': 6715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:04.599553', 'step': 6715, 'epoch': 1} {'type': 'loss', 'content': 0.15462319552898407, 'timestamp': '2025-10-01 04:21:04.624084', 'step': 6716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:21:04.660402', 'step': 6716, 'epoch': 1} {'type': 'loss', 'content': 0.17082518339157104, 'timestamp': '2025-10-01 04:21:04.665329', 'step': 6717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:04.701007', 'step': 6717, 'epoch': 1} {'type': 'loss', 'content': 0.1373123973608017, 'timestamp': '2025-10-01 04:21:04.703082', 'step': 6718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:04.741659', 'step': 6718, 'epoch': 1} {'type': 'loss', 'content': 0.14951322972774506, 'timestamp': '2025-10-01 04:21:04.744019', 'step': 6719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:04.776368', 'step': 6719, 'epoch': 1} {'type': 'loss', 'content': 0.15272152423858643, 'timestamp': '2025-10-01 04:21:04.799836', 'step': 6720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:04.832107', 'step': 6720, 'epoch': 1} {'type': 'loss', 'content': 0.1962253898382187, 'timestamp': '2025-10-01 04:21:04.834266', 'step': 6721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:04.865586', 'step': 6721, 'epoch': 1} {'type': 'loss', 'content': 0.29694148898124695, 'timestamp': '2025-10-01 04:21:04.867637', 'step': 6722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:04.898548', 'step': 6722, 'epoch': 1} {'type': 'loss', 'content': 0.19422776997089386, 'timestamp': '2025-10-01 04:21:04.900483', 'step': 6723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:04.934555', 'step': 6723, 'epoch': 1} {'type': 'loss', 'content': 0.10540730506181717, 'timestamp': '2025-10-01 04:21:04.958258', 'step': 6724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:04.997254', 'step': 6724, 'epoch': 1} {'type': 'loss', 'content': 0.10287220776081085, 'timestamp': '2025-10-01 04:21:04.999226', 'step': 6725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:05.035059', 'step': 6725, 'epoch': 1} {'type': 'loss', 'content': 0.09571515768766403, 'timestamp': '2025-10-01 04:21:05.037063', 'step': 6726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:05.073137', 'step': 6726, 'epoch': 1} {'type': 'loss', 'content': 0.10290875285863876, 'timestamp': '2025-10-01 04:21:05.075377', 'step': 6727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:05.110610', 'step': 6727, 'epoch': 1} {'type': 'loss', 'content': 0.09527119249105453, 'timestamp': '2025-10-01 04:21:05.134155', 'step': 6728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:05.169211', 'step': 6728, 'epoch': 1} {'type': 'loss', 'content': 0.16873471438884735, 'timestamp': '2025-10-01 04:21:05.171305', 'step': 6729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:05.209146', 'step': 6729, 'epoch': 1} {'type': 'loss', 'content': 0.21529190242290497, 'timestamp': '2025-10-01 04:21:05.211246', 'step': 6730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:05.251915', 'step': 6730, 'epoch': 1} {'type': 'loss', 'content': 0.21774719655513763, 'timestamp': '2025-10-01 04:21:05.253866', 'step': 6731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:05.284820', 'step': 6731, 'epoch': 1} {'type': 'loss', 'content': 0.22716961801052094, 'timestamp': '2025-10-01 04:21:05.309566', 'step': 6732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:05.340338', 'step': 6732, 'epoch': 1} {'type': 'loss', 'content': 0.10844503343105316, 'timestamp': '2025-10-01 04:21:05.342242', 'step': 6733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:05.373601', 'step': 6733, 'epoch': 1} {'type': 'loss', 'content': 0.18421761691570282, 'timestamp': '2025-10-01 04:21:05.377777', 'step': 6734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:05.415688', 'step': 6734, 'epoch': 1} {'type': 'loss', 'content': 0.0671662911772728, 'timestamp': '2025-10-01 04:21:05.417757', 'step': 6735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:05.449108', 'step': 6735, 'epoch': 1} {'type': 'loss', 'content': 0.15830813348293304, 'timestamp': '2025-10-01 04:21:05.472623', 'step': 6736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:05.506836', 'step': 6736, 'epoch': 1} {'type': 'loss', 'content': 0.17520655691623688, 'timestamp': '2025-10-01 04:21:05.509108', 'step': 6737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:05.540771', 'step': 6737, 'epoch': 1} {'type': 'loss', 'content': 0.15171197056770325, 'timestamp': '2025-10-01 04:21:05.542927', 'step': 6738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:05.574067', 'step': 6738, 'epoch': 1} {'type': 'loss', 'content': 0.16626620292663574, 'timestamp': '2025-10-01 04:21:05.576124', 'step': 6739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:05.608256', 'step': 6739, 'epoch': 1} {'type': 'loss', 'content': 0.1296047568321228, 'timestamp': '2025-10-01 04:21:05.631699', 'step': 6740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:05.661683', 'step': 6740, 'epoch': 1} {'type': 'loss', 'content': 0.18765616416931152, 'timestamp': '2025-10-01 04:21:05.663730', 'step': 6741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:05.694628', 'step': 6741, 'epoch': 1} {'type': 'loss', 'content': 0.13928812742233276, 'timestamp': '2025-10-01 04:21:05.697396', 'step': 6742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:05.737402', 'step': 6742, 'epoch': 1} {'type': 'loss', 'content': 0.14062628149986267, 'timestamp': '2025-10-01 04:21:05.739331', 'step': 6743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:05.773207', 'step': 6743, 'epoch': 1} {'type': 'loss', 'content': 0.12776075303554535, 'timestamp': '2025-10-01 04:21:05.796776', 'step': 6744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:05.830631', 'step': 6744, 'epoch': 1} {'type': 'loss', 'content': 0.14001749455928802, 'timestamp': '2025-10-01 04:21:05.832772', 'step': 6745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:05.865718', 'step': 6745, 'epoch': 1} {'type': 'loss', 'content': 0.1271551251411438, 'timestamp': '2025-10-01 04:21:05.867949', 'step': 6746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:05.899039', 'step': 6746, 'epoch': 1} {'type': 'loss', 'content': 0.17166748642921448, 'timestamp': '2025-10-01 04:21:05.900930', 'step': 6747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:05.932397', 'step': 6747, 'epoch': 1} {'type': 'loss', 'content': 0.19299207627773285, 'timestamp': '2025-10-01 04:21:05.956111', 'step': 6748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:05.993310', 'step': 6748, 'epoch': 1} {'type': 'loss', 'content': 0.13448484241962433, 'timestamp': '2025-10-01 04:21:05.995306', 'step': 6749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:06.025899', 'step': 6749, 'epoch': 1} {'type': 'loss', 'content': 0.24567972123622894, 'timestamp': '2025-10-01 04:21:06.027975', 'step': 6750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:06.061738', 'step': 6750, 'epoch': 1} {'type': 'loss', 'content': 0.09205780178308487, 'timestamp': '2025-10-01 04:21:06.063850', 'step': 6751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:06.095402', 'step': 6751, 'epoch': 1} {'type': 'loss', 'content': 0.18249119818210602, 'timestamp': '2025-10-01 04:21:06.118986', 'step': 6752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:06.155404', 'step': 6752, 'epoch': 1} {'type': 'loss', 'content': 0.16769926249980927, 'timestamp': '2025-10-01 04:21:06.157300', 'step': 6753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:06.196955', 'step': 6753, 'epoch': 1} {'type': 'loss', 'content': 0.13055528700351715, 'timestamp': '2025-10-01 04:21:06.199054', 'step': 6754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:06.239437', 'step': 6754, 'epoch': 1} {'type': 'loss', 'content': 0.14736995100975037, 'timestamp': '2025-10-01 04:21:06.241543', 'step': 6755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:06.272418', 'step': 6755, 'epoch': 1} {'type': 'loss', 'content': 0.15121091902256012, 'timestamp': '2025-10-01 04:21:06.295826', 'step': 6756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:06.327084', 'step': 6756, 'epoch': 1} {'type': 'loss', 'content': 0.12100851535797119, 'timestamp': '2025-10-01 04:21:06.329403', 'step': 6757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:06.362735', 'step': 6757, 'epoch': 1} {'type': 'loss', 'content': 0.048022639006376266, 'timestamp': '2025-10-01 04:21:06.364987', 'step': 6758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:06.401531', 'step': 6758, 'epoch': 1} {'type': 'loss', 'content': 0.13187387585639954, 'timestamp': '2025-10-01 04:21:06.403761', 'step': 6759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:06.437738', 'step': 6759, 'epoch': 1} {'type': 'loss', 'content': 0.11220839619636536, 'timestamp': '2025-10-01 04:21:06.461523', 'step': 6760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:06.495653', 'step': 6760, 'epoch': 1} {'type': 'loss', 'content': 0.14779603481292725, 'timestamp': '2025-10-01 04:21:06.497774', 'step': 6761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:06.534233', 'step': 6761, 'epoch': 1} {'type': 'loss', 'content': 0.1699313074350357, 'timestamp': '2025-10-01 04:21:06.536480', 'step': 6762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:06.567427', 'step': 6762, 'epoch': 1} {'type': 'loss', 'content': 0.1222376674413681, 'timestamp': '2025-10-01 04:21:06.569883', 'step': 6763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:06.602424', 'step': 6763, 'epoch': 1} {'type': 'loss', 'content': 0.09502188861370087, 'timestamp': '2025-10-01 04:21:06.627021', 'step': 6764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:06.656916', 'step': 6764, 'epoch': 1} {'type': 'loss', 'content': 0.13724172115325928, 'timestamp': '2025-10-01 04:21:06.658923', 'step': 6765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:06.690484', 'step': 6765, 'epoch': 1} {'type': 'loss', 'content': 0.20306609570980072, 'timestamp': '2025-10-01 04:21:06.693098', 'step': 6766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:06.727150', 'step': 6766, 'epoch': 1} {'type': 'loss', 'content': 0.1587676852941513, 'timestamp': '2025-10-01 04:21:06.729115', 'step': 6767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:06.761540', 'step': 6767, 'epoch': 1} {'type': 'loss', 'content': 0.19436825811862946, 'timestamp': '2025-10-01 04:21:06.785176', 'step': 6768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:06.819299', 'step': 6768, 'epoch': 1} {'type': 'loss', 'content': 0.10884765535593033, 'timestamp': '2025-10-01 04:21:06.823591', 'step': 6769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:06.854145', 'step': 6769, 'epoch': 1} {'type': 'loss', 'content': 0.22992800176143646, 'timestamp': '2025-10-01 04:21:06.856162', 'step': 6770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:06.888160', 'step': 6770, 'epoch': 1} {'type': 'loss', 'content': 0.09967270493507385, 'timestamp': '2025-10-01 04:21:06.890511', 'step': 6771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:06.928691', 'step': 6771, 'epoch': 1} {'type': 'loss', 'content': 0.1746135950088501, 'timestamp': '2025-10-01 04:21:06.952213', 'step': 6772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:06.985763', 'step': 6772, 'epoch': 1} {'type': 'loss', 'content': 0.10558809340000153, 'timestamp': '2025-10-01 04:21:06.987913', 'step': 6773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:07.021273', 'step': 6773, 'epoch': 1} {'type': 'loss', 'content': 0.19911585748195648, 'timestamp': '2025-10-01 04:21:07.023644', 'step': 6774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:07.058111', 'step': 6774, 'epoch': 1} {'type': 'loss', 'content': 0.059539686888456345, 'timestamp': '2025-10-01 04:21:07.063431', 'step': 6775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:07.095873', 'step': 6775, 'epoch': 1} {'type': 'loss', 'content': 0.10718828439712524, 'timestamp': '2025-10-01 04:21:07.119592', 'step': 6776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:07.152363', 'step': 6776, 'epoch': 1} {'type': 'loss', 'content': 0.08592533320188522, 'timestamp': '2025-10-01 04:21:07.154406', 'step': 6777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:07.192215', 'step': 6777, 'epoch': 1} {'type': 'loss', 'content': 0.18010690808296204, 'timestamp': '2025-10-01 04:21:07.194659', 'step': 6778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:07.225974', 'step': 6778, 'epoch': 1} {'type': 'loss', 'content': 0.14809949696063995, 'timestamp': '2025-10-01 04:21:07.227983', 'step': 6779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:07.261258', 'step': 6779, 'epoch': 1} {'type': 'loss', 'content': 0.19611652195453644, 'timestamp': '2025-10-01 04:21:07.285965', 'step': 6780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:07.319804', 'step': 6780, 'epoch': 1} {'type': 'loss', 'content': 0.07712951302528381, 'timestamp': '2025-10-01 04:21:07.321988', 'step': 6781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:07.353686', 'step': 6781, 'epoch': 1} {'type': 'loss', 'content': 0.09367803484201431, 'timestamp': '2025-10-01 04:21:07.355576', 'step': 6782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:07.388452', 'step': 6782, 'epoch': 1} {'type': 'loss', 'content': 0.20028355717658997, 'timestamp': '2025-10-01 04:21:07.391031', 'step': 6783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:07.424910', 'step': 6783, 'epoch': 1} {'type': 'loss', 'content': 0.13666003942489624, 'timestamp': '2025-10-01 04:21:07.448903', 'step': 6784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:07.479466', 'step': 6784, 'epoch': 1} {'type': 'loss', 'content': 0.15520042181015015, 'timestamp': '2025-10-01 04:21:07.482197', 'step': 6785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:07.513499', 'step': 6785, 'epoch': 1} {'type': 'loss', 'content': 0.10597753524780273, 'timestamp': '2025-10-01 04:21:07.518876', 'step': 6786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:07.550032', 'step': 6786, 'epoch': 1} {'type': 'loss', 'content': 0.2543776333332062, 'timestamp': '2025-10-01 04:21:07.552135', 'step': 6787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:07.584137', 'step': 6787, 'epoch': 1} {'type': 'loss', 'content': 0.11320922523736954, 'timestamp': '2025-10-01 04:21:07.607881', 'step': 6788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:07.639441', 'step': 6788, 'epoch': 1} {'type': 'loss', 'content': 0.1319999396800995, 'timestamp': '2025-10-01 04:21:07.641615', 'step': 6789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:07.671858', 'step': 6789, 'epoch': 1} {'type': 'loss', 'content': 0.20676563680171967, 'timestamp': '2025-10-01 04:21:07.674070', 'step': 6790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:07.704141', 'step': 6790, 'epoch': 1} {'type': 'loss', 'content': 0.12346550077199936, 'timestamp': '2025-10-01 04:21:07.706516', 'step': 6791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:07.737500', 'step': 6791, 'epoch': 1} {'type': 'loss', 'content': 0.15695048868656158, 'timestamp': '2025-10-01 04:21:07.761630', 'step': 6792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:07.798986', 'step': 6792, 'epoch': 1} {'type': 'loss', 'content': 0.10162030905485153, 'timestamp': '2025-10-01 04:21:07.807068', 'step': 6793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:07.838621', 'step': 6793, 'epoch': 1} {'type': 'loss', 'content': 0.162831112742424, 'timestamp': '2025-10-01 04:21:07.840604', 'step': 6794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:07.873200', 'step': 6794, 'epoch': 1} {'type': 'loss', 'content': 0.07247112691402435, 'timestamp': '2025-10-01 04:21:07.875581', 'step': 6795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:07.911891', 'step': 6795, 'epoch': 1} {'type': 'loss', 'content': 0.11882081627845764, 'timestamp': '2025-10-01 04:21:07.936134', 'step': 6796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:07.967625', 'step': 6796, 'epoch': 1} {'type': 'loss', 'content': 0.21542024612426758, 'timestamp': '2025-10-01 04:21:07.969783', 'step': 6797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.005121', 'step': 6797, 'epoch': 1} {'type': 'loss', 'content': 0.18798001110553741, 'timestamp': '2025-10-01 04:21:08.007193', 'step': 6798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.043867', 'step': 6798, 'epoch': 1} {'type': 'loss', 'content': 0.11812335252761841, 'timestamp': '2025-10-01 04:21:08.047028', 'step': 6799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:08.080511', 'step': 6799, 'epoch': 1} {'type': 'loss', 'content': 0.21793897449970245, 'timestamp': '2025-10-01 04:21:08.104236', 'step': 6800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.137193', 'step': 6800, 'epoch': 1} {'type': 'loss', 'content': 0.14037209749221802, 'timestamp': '2025-10-01 04:21:08.139251', 'step': 6801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:08.172275', 'step': 6801, 'epoch': 1} {'type': 'loss', 'content': 0.2044525444507599, 'timestamp': '2025-10-01 04:21:08.174602', 'step': 6802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.206851', 'step': 6802, 'epoch': 1} {'type': 'loss', 'content': 0.19817255437374115, 'timestamp': '2025-10-01 04:21:08.209176', 'step': 6803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.239256', 'step': 6803, 'epoch': 1} {'type': 'loss', 'content': 0.11729637533426285, 'timestamp': '2025-10-01 04:21:08.262816', 'step': 6804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:08.295225', 'step': 6804, 'epoch': 1} {'type': 'loss', 'content': 0.19916725158691406, 'timestamp': '2025-10-01 04:21:08.297332', 'step': 6805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:08.342898', 'step': 6805, 'epoch': 1} {'type': 'loss', 'content': 0.11640207469463348, 'timestamp': '2025-10-01 04:21:08.346087', 'step': 6806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.378685', 'step': 6806, 'epoch': 1} {'type': 'loss', 'content': 0.07578852772712708, 'timestamp': '2025-10-01 04:21:08.380569', 'step': 6807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:08.412720', 'step': 6807, 'epoch': 1} {'type': 'loss', 'content': 0.13665653765201569, 'timestamp': '2025-10-01 04:21:08.436120', 'step': 6808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:08.472033', 'step': 6808, 'epoch': 1} {'type': 'loss', 'content': 0.1292337328195572, 'timestamp': '2025-10-01 04:21:08.474095', 'step': 6809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:08.506162', 'step': 6809, 'epoch': 1} {'type': 'loss', 'content': 0.1946096420288086, 'timestamp': '2025-10-01 04:21:08.508085', 'step': 6810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:08.537688', 'step': 6810, 'epoch': 1} {'type': 'loss', 'content': 0.12002959102392197, 'timestamp': '2025-10-01 04:21:08.539858', 'step': 6811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:08.573014', 'step': 6811, 'epoch': 1} {'type': 'loss', 'content': 0.17660902440547943, 'timestamp': '2025-10-01 04:21:08.596823', 'step': 6812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.629624', 'step': 6812, 'epoch': 1} {'type': 'loss', 'content': 0.09998445957899094, 'timestamp': '2025-10-01 04:21:08.631564', 'step': 6813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.663375', 'step': 6813, 'epoch': 1} {'type': 'loss', 'content': 0.11027206480503082, 'timestamp': '2025-10-01 04:21:08.665394', 'step': 6814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.700602', 'step': 6814, 'epoch': 1} {'type': 'loss', 'content': 0.14562174677848816, 'timestamp': '2025-10-01 04:21:08.702705', 'step': 6815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.736610', 'step': 6815, 'epoch': 1} {'type': 'loss', 'content': 0.10016805678606033, 'timestamp': '2025-10-01 04:21:08.760200', 'step': 6816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.791562', 'step': 6816, 'epoch': 1} {'type': 'loss', 'content': 0.14907105267047882, 'timestamp': '2025-10-01 04:21:08.793847', 'step': 6817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.825011', 'step': 6817, 'epoch': 1} {'type': 'loss', 'content': 0.17735637724399567, 'timestamp': '2025-10-01 04:21:08.827090', 'step': 6818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:08.858798', 'step': 6818, 'epoch': 1} {'type': 'loss', 'content': 0.20332704484462738, 'timestamp': '2025-10-01 04:21:08.860889', 'step': 6819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.893213', 'step': 6819, 'epoch': 1} {'type': 'loss', 'content': 0.09840860217809677, 'timestamp': '2025-10-01 04:21:08.916784', 'step': 6820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:08.950535', 'step': 6820, 'epoch': 1} {'type': 'loss', 'content': 0.16795198619365692, 'timestamp': '2025-10-01 04:21:08.955358', 'step': 6821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:08.986032', 'step': 6821, 'epoch': 1} {'type': 'loss', 'content': 0.11428116261959076, 'timestamp': '2025-10-01 04:21:08.988052', 'step': 6822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:09.019069', 'step': 6822, 'epoch': 1} {'type': 'loss', 'content': 0.11496836692094803, 'timestamp': '2025-10-01 04:21:09.022042', 'step': 6823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:09.054428', 'step': 6823, 'epoch': 1} {'type': 'loss', 'content': 0.16486665606498718, 'timestamp': '2025-10-01 04:21:09.077930', 'step': 6824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:09.109611', 'step': 6824, 'epoch': 1} {'type': 'loss', 'content': 0.1552681028842926, 'timestamp': '2025-10-01 04:21:09.111858', 'step': 6825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:09.143026', 'step': 6825, 'epoch': 1} {'type': 'loss', 'content': 0.19922778010368347, 'timestamp': '2025-10-01 04:21:09.145828', 'step': 6826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:09.177654', 'step': 6826, 'epoch': 1} {'type': 'loss', 'content': 0.19683785736560822, 'timestamp': '2025-10-01 04:21:09.179744', 'step': 6827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:09.210529', 'step': 6827, 'epoch': 1} {'type': 'loss', 'content': 0.09323881566524506, 'timestamp': '2025-10-01 04:21:09.234001', 'step': 6828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:09.264760', 'step': 6828, 'epoch': 1} {'type': 'loss', 'content': 0.11510007083415985, 'timestamp': '2025-10-01 04:21:09.270475', 'step': 6829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:09.301304', 'step': 6829, 'epoch': 1} {'type': 'loss', 'content': 0.10207292437553406, 'timestamp': '2025-10-01 04:21:09.304219', 'step': 6830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:09.336104', 'step': 6830, 'epoch': 1} {'type': 'loss', 'content': 0.22942619025707245, 'timestamp': '2025-10-01 04:21:09.338351', 'step': 6831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:09.369465', 'step': 6831, 'epoch': 1} {'type': 'loss', 'content': 0.09660270065069199, 'timestamp': '2025-10-01 04:21:09.393182', 'step': 6832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:09.423793', 'step': 6832, 'epoch': 1} {'type': 'loss', 'content': 0.17681719362735748, 'timestamp': '2025-10-01 04:21:09.426304', 'step': 6833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:09.456344', 'step': 6833, 'epoch': 1} {'type': 'loss', 'content': 0.16535243391990662, 'timestamp': '2025-10-01 04:21:09.460683', 'step': 6834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:09.492723', 'step': 6834, 'epoch': 1} {'type': 'loss', 'content': 0.10126163810491562, 'timestamp': '2025-10-01 04:21:09.494908', 'step': 6835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:09.525009', 'step': 6835, 'epoch': 1} {'type': 'loss', 'content': 0.13881857693195343, 'timestamp': '2025-10-01 04:21:09.548569', 'step': 6836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:09.578579', 'step': 6836, 'epoch': 1} {'type': 'loss', 'content': 0.192677840590477, 'timestamp': '2025-10-01 04:21:09.580866', 'step': 6837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:09.611889', 'step': 6837, 'epoch': 1} {'type': 'loss', 'content': 0.11464745551347733, 'timestamp': '2025-10-01 04:21:09.613946', 'step': 6838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:09.647173', 'step': 6838, 'epoch': 1} {'type': 'loss', 'content': 0.15936268866062164, 'timestamp': '2025-10-01 04:21:09.649439', 'step': 6839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:09.681196', 'step': 6839, 'epoch': 1} {'type': 'loss', 'content': 0.1329970806837082, 'timestamp': '2025-10-01 04:21:09.704774', 'step': 6840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:09.737225', 'step': 6840, 'epoch': 1} {'type': 'loss', 'content': 0.1218235045671463, 'timestamp': '2025-10-01 04:21:09.739339', 'step': 6841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:09.773340', 'step': 6841, 'epoch': 1} {'type': 'loss', 'content': 0.10816199332475662, 'timestamp': '2025-10-01 04:21:09.777770', 'step': 6842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:09.811029', 'step': 6842, 'epoch': 1} {'type': 'loss', 'content': 0.13794057071208954, 'timestamp': '2025-10-01 04:21:09.813381', 'step': 6843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:09.843485', 'step': 6843, 'epoch': 1} {'type': 'loss', 'content': 0.13182932138442993, 'timestamp': '2025-10-01 04:21:09.866961', 'step': 6844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:09.896956', 'step': 6844, 'epoch': 1} {'type': 'loss', 'content': 0.1540171504020691, 'timestamp': '2025-10-01 04:21:09.898933', 'step': 6845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:09.929422', 'step': 6845, 'epoch': 1} {'type': 'loss', 'content': 0.10749229043722153, 'timestamp': '2025-10-01 04:21:09.931489', 'step': 6846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:09.962321', 'step': 6846, 'epoch': 1} {'type': 'loss', 'content': 0.11314921826124191, 'timestamp': '2025-10-01 04:21:09.964540', 'step': 6847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:09.994279', 'step': 6847, 'epoch': 1} {'type': 'loss', 'content': 0.13105355203151703, 'timestamp': '2025-10-01 04:21:10.017768', 'step': 6848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:10.047705', 'step': 6848, 'epoch': 1} {'type': 'loss', 'content': 0.12529300153255463, 'timestamp': '2025-10-01 04:21:10.049743', 'step': 6849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:10.079686', 'step': 6849, 'epoch': 1} {'type': 'loss', 'content': 0.08577161282300949, 'timestamp': '2025-10-01 04:21:10.081698', 'step': 6850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:10.112498', 'step': 6850, 'epoch': 1} {'type': 'loss', 'content': 0.12068837136030197, 'timestamp': '2025-10-01 04:21:10.114557', 'step': 6851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:10.144260', 'step': 6851, 'epoch': 1} {'type': 'loss', 'content': 0.1096087098121643, 'timestamp': '2025-10-01 04:21:10.168230', 'step': 6852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:10.200634', 'step': 6852, 'epoch': 1} {'type': 'loss', 'content': 0.11541883647441864, 'timestamp': '2025-10-01 04:21:10.203574', 'step': 6853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:10.236445', 'step': 6853, 'epoch': 1} {'type': 'loss', 'content': 0.19455234706401825, 'timestamp': '2025-10-01 04:21:10.238609', 'step': 6854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:10.268380', 'step': 6854, 'epoch': 1} {'type': 'loss', 'content': 0.1987731009721756, 'timestamp': '2025-10-01 04:21:10.270278', 'step': 6855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:10.301056', 'step': 6855, 'epoch': 1} {'type': 'loss', 'content': 0.1865401268005371, 'timestamp': '2025-10-01 04:21:10.324547', 'step': 6856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:10.355657', 'step': 6856, 'epoch': 1} {'type': 'loss', 'content': 0.09331799298524857, 'timestamp': '2025-10-01 04:21:10.358117', 'step': 6857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:10.388710', 'step': 6857, 'epoch': 1} {'type': 'loss', 'content': 0.1068585216999054, 'timestamp': '2025-10-01 04:21:10.391079', 'step': 6858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:10.424211', 'step': 6858, 'epoch': 1} {'type': 'loss', 'content': 0.12202916294336319, 'timestamp': '2025-10-01 04:21:10.426208', 'step': 6859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:10.457251', 'step': 6859, 'epoch': 1} {'type': 'loss', 'content': 0.09949582815170288, 'timestamp': '2025-10-01 04:21:10.481540', 'step': 6860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:10.512101', 'step': 6860, 'epoch': 1} {'type': 'loss', 'content': 0.15156544744968414, 'timestamp': '2025-10-01 04:21:10.514247', 'step': 6861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:10.545402', 'step': 6861, 'epoch': 1} {'type': 'loss', 'content': 0.1464867740869522, 'timestamp': '2025-10-01 04:21:10.547426', 'step': 6862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:10.577570', 'step': 6862, 'epoch': 1} {'type': 'loss', 'content': 0.2711246609687805, 'timestamp': '2025-10-01 04:21:10.579787', 'step': 6863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:10.609794', 'step': 6863, 'epoch': 1} {'type': 'loss', 'content': 0.17475928366184235, 'timestamp': '2025-10-01 04:21:10.633424', 'step': 6864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:10.665320', 'step': 6864, 'epoch': 1} {'type': 'loss', 'content': 0.09057168662548065, 'timestamp': '2025-10-01 04:21:10.667394', 'step': 6865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:10.698126', 'step': 6865, 'epoch': 1} {'type': 'loss', 'content': 0.10047021508216858, 'timestamp': '2025-10-01 04:21:10.700327', 'step': 6866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:10.737853', 'step': 6866, 'epoch': 1} {'type': 'loss', 'content': 0.1312083750963211, 'timestamp': '2025-10-01 04:21:10.740252', 'step': 6867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:10.771610', 'step': 6867, 'epoch': 1} {'type': 'loss', 'content': 0.11819792538881302, 'timestamp': '2025-10-01 04:21:10.795059', 'step': 6868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:10.825589', 'step': 6868, 'epoch': 1} {'type': 'loss', 'content': 0.20725278556346893, 'timestamp': '2025-10-01 04:21:10.827567', 'step': 6869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:10.857683', 'step': 6869, 'epoch': 1} {'type': 'loss', 'content': 0.12195385247468948, 'timestamp': '2025-10-01 04:21:10.859733', 'step': 6870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:10.890665', 'step': 6870, 'epoch': 1} {'type': 'loss', 'content': 0.18374723196029663, 'timestamp': '2025-10-01 04:21:10.893347', 'step': 6871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:10.923396', 'step': 6871, 'epoch': 1} {'type': 'loss', 'content': 0.17599566280841827, 'timestamp': '2025-10-01 04:21:10.946945', 'step': 6872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:10.976856', 'step': 6872, 'epoch': 1} {'type': 'loss', 'content': 0.18678291141986847, 'timestamp': '2025-10-01 04:21:10.979107', 'step': 6873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:11.009878', 'step': 6873, 'epoch': 1} {'type': 'loss', 'content': 0.17077139019966125, 'timestamp': '2025-10-01 04:21:11.012208', 'step': 6874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:11.042600', 'step': 6874, 'epoch': 1} {'type': 'loss', 'content': 0.24271686375141144, 'timestamp': '2025-10-01 04:21:11.049770', 'step': 6875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:11.086324', 'step': 6875, 'epoch': 1} {'type': 'loss', 'content': 0.14122603833675385, 'timestamp': '2025-10-01 04:21:11.115262', 'step': 6876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:11.149808', 'step': 6876, 'epoch': 1} {'type': 'loss', 'content': 0.12847968935966492, 'timestamp': '2025-10-01 04:21:11.152027', 'step': 6877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:11.186053', 'step': 6877, 'epoch': 1} {'type': 'loss', 'content': 0.1543527990579605, 'timestamp': '2025-10-01 04:21:11.190153', 'step': 6878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:11.227355', 'step': 6878, 'epoch': 1} {'type': 'loss', 'content': 0.16262342035770416, 'timestamp': '2025-10-01 04:21:11.229515', 'step': 6879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:11.259633', 'step': 6879, 'epoch': 1} {'type': 'loss', 'content': 0.11969045549631119, 'timestamp': '2025-10-01 04:21:11.283425', 'step': 6880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:11.323113', 'step': 6880, 'epoch': 1} {'type': 'loss', 'content': 0.12853103876113892, 'timestamp': '2025-10-01 04:21:11.326572', 'step': 6881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:11.360293', 'step': 6881, 'epoch': 1} {'type': 'loss', 'content': 0.2933313250541687, 'timestamp': '2025-10-01 04:21:11.362355', 'step': 6882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:11.392395', 'step': 6882, 'epoch': 1} {'type': 'loss', 'content': 0.12414519488811493, 'timestamp': '2025-10-01 04:21:11.394524', 'step': 6883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:11.426334', 'step': 6883, 'epoch': 1} {'type': 'loss', 'content': 0.16005578637123108, 'timestamp': '2025-10-01 04:21:11.450175', 'step': 6884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:11.505132', 'step': 6884, 'epoch': 1} {'type': 'loss', 'content': 0.15501032769680023, 'timestamp': '2025-10-01 04:21:11.508537', 'step': 6885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:11.574771', 'step': 6885, 'epoch': 1} {'type': 'loss', 'content': 0.11345292627811432, 'timestamp': '2025-10-01 04:21:11.576756', 'step': 6886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:11.629506', 'step': 6886, 'epoch': 1} {'type': 'loss', 'content': 0.12451362609863281, 'timestamp': '2025-10-01 04:21:11.632020', 'step': 6887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:21:11.678522', 'step': 6887, 'epoch': 1} {'type': 'loss', 'content': 0.09496480971574783, 'timestamp': '2025-10-01 04:21:11.703917', 'step': 6888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:11.751793', 'step': 6888, 'epoch': 1} {'type': 'loss', 'content': 0.1090736910700798, 'timestamp': '2025-10-01 04:21:11.754124', 'step': 6889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:11.796940', 'step': 6889, 'epoch': 1} {'type': 'loss', 'content': 0.09825349599123001, 'timestamp': '2025-10-01 04:21:11.800797', 'step': 6890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:11.870734', 'step': 6890, 'epoch': 1} {'type': 'loss', 'content': 0.14149539172649384, 'timestamp': '2025-10-01 04:21:11.873030', 'step': 6891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:11.916579', 'step': 6891, 'epoch': 1} {'type': 'loss', 'content': 0.14217136800289154, 'timestamp': '2025-10-01 04:21:11.940041', 'step': 6892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:12.001209', 'step': 6892, 'epoch': 1} {'type': 'loss', 'content': 0.21908903121948242, 'timestamp': '2025-10-01 04:21:12.006188', 'step': 6893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:12.057311', 'step': 6893, 'epoch': 1} {'type': 'loss', 'content': 0.08701859414577484, 'timestamp': '2025-10-01 04:21:12.068998', 'step': 6894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:12.123844', 'step': 6894, 'epoch': 1} {'type': 'loss', 'content': 0.15551023185253143, 'timestamp': '2025-10-01 04:21:12.125858', 'step': 6895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:12.176986', 'step': 6895, 'epoch': 1} {'type': 'loss', 'content': 0.17813894152641296, 'timestamp': '2025-10-01 04:21:12.201023', 'step': 6896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:12.255315', 'step': 6896, 'epoch': 1} {'type': 'loss', 'content': 0.12042748928070068, 'timestamp': '2025-10-01 04:21:12.257624', 'step': 6897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:12.309417', 'step': 6897, 'epoch': 1} {'type': 'loss', 'content': 0.14951105415821075, 'timestamp': '2025-10-01 04:21:12.311352', 'step': 6898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:12.379741', 'step': 6898, 'epoch': 1} {'type': 'loss', 'content': 0.203620046377182, 'timestamp': '2025-10-01 04:21:12.381503', 'step': 6899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:12.436291', 'step': 6899, 'epoch': 1} {'type': 'loss', 'content': 0.15367726981639862, 'timestamp': '2025-10-01 04:21:12.460609', 'step': 6900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:12.518561', 'step': 6900, 'epoch': 1} {'type': 'loss', 'content': 0.14750459790229797, 'timestamp': '2025-10-01 04:21:12.524190', 'step': 6901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:12.575789', 'step': 6901, 'epoch': 1} {'type': 'loss', 'content': 0.32453539967536926, 'timestamp': '2025-10-01 04:21:12.577759', 'step': 6902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:12.623972', 'step': 6902, 'epoch': 1} {'type': 'loss', 'content': 0.20958346128463745, 'timestamp': '2025-10-01 04:21:12.627005', 'step': 6903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:12.677995', 'step': 6903, 'epoch': 1} {'type': 'loss', 'content': 0.11899208277463913, 'timestamp': '2025-10-01 04:21:12.702174', 'step': 6904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:12.758618', 'step': 6904, 'epoch': 1} {'type': 'loss', 'content': 0.2068018764257431, 'timestamp': '2025-10-01 04:21:12.760891', 'step': 6905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:12.796063', 'step': 6905, 'epoch': 1} {'type': 'loss', 'content': 0.20164594054222107, 'timestamp': '2025-10-01 04:21:12.798093', 'step': 6906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:12.828624', 'step': 6906, 'epoch': 1} {'type': 'loss', 'content': 0.15984050929546356, 'timestamp': '2025-10-01 04:21:12.831528', 'step': 6907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:12.862034', 'step': 6907, 'epoch': 1} {'type': 'loss', 'content': 0.09359835833311081, 'timestamp': '2025-10-01 04:21:12.885410', 'step': 6908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:12.918510', 'step': 6908, 'epoch': 1} {'type': 'loss', 'content': 0.14004671573638916, 'timestamp': '2025-10-01 04:21:12.920560', 'step': 6909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:12.950349', 'step': 6909, 'epoch': 1} {'type': 'loss', 'content': 0.1780533492565155, 'timestamp': '2025-10-01 04:21:12.952580', 'step': 6910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:12.982407', 'step': 6910, 'epoch': 1} {'type': 'loss', 'content': 0.13053655624389648, 'timestamp': '2025-10-01 04:21:12.984500', 'step': 6911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:13.016738', 'step': 6911, 'epoch': 1} {'type': 'loss', 'content': 0.10704268515110016, 'timestamp': '2025-10-01 04:21:13.040014', 'step': 6912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:13.069950', 'step': 6912, 'epoch': 1} {'type': 'loss', 'content': 0.15566545724868774, 'timestamp': '2025-10-01 04:21:13.072193', 'step': 6913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:13.102040', 'step': 6913, 'epoch': 1} {'type': 'loss', 'content': 0.14701762795448303, 'timestamp': '2025-10-01 04:21:13.104097', 'step': 6914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:13.134568', 'step': 6914, 'epoch': 1} {'type': 'loss', 'content': 0.18436484038829803, 'timestamp': '2025-10-01 04:21:13.136649', 'step': 6915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:13.167297', 'step': 6915, 'epoch': 1} {'type': 'loss', 'content': 0.10772725194692612, 'timestamp': '2025-10-01 04:21:13.190871', 'step': 6916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:13.222288', 'step': 6916, 'epoch': 1} {'type': 'loss', 'content': 0.10457679629325867, 'timestamp': '2025-10-01 04:21:13.224582', 'step': 6917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:13.254570', 'step': 6917, 'epoch': 1} {'type': 'loss', 'content': 0.07200214266777039, 'timestamp': '2025-10-01 04:21:13.256995', 'step': 6918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:13.289835', 'step': 6918, 'epoch': 1} {'type': 'loss', 'content': 0.1178271472454071, 'timestamp': '2025-10-01 04:21:13.292227', 'step': 6919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:13.323329', 'step': 6919, 'epoch': 1} {'type': 'loss', 'content': 0.08491367101669312, 'timestamp': '2025-10-01 04:21:13.347097', 'step': 6920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:13.377309', 'step': 6920, 'epoch': 1} {'type': 'loss', 'content': 0.09620588272809982, 'timestamp': '2025-10-01 04:21:13.379283', 'step': 6921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:13.410593', 'step': 6921, 'epoch': 1} {'type': 'loss', 'content': 0.1282733529806137, 'timestamp': '2025-10-01 04:21:13.412745', 'step': 6922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:13.443710', 'step': 6922, 'epoch': 1} {'type': 'loss', 'content': 0.1504247933626175, 'timestamp': '2025-10-01 04:21:13.445912', 'step': 6923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:13.476108', 'step': 6923, 'epoch': 1} {'type': 'loss', 'content': 0.11777136474847794, 'timestamp': '2025-10-01 04:21:13.499656', 'step': 6924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:13.529483', 'step': 6924, 'epoch': 1} {'type': 'loss', 'content': 0.13248635828495026, 'timestamp': '2025-10-01 04:21:13.531746', 'step': 6925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:21:13.568370', 'step': 6925, 'epoch': 1} {'type': 'loss', 'content': 0.16426414251327515, 'timestamp': '2025-10-01 04:21:13.572690', 'step': 6926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:13.603227', 'step': 6926, 'epoch': 1} {'type': 'loss', 'content': 0.1768939048051834, 'timestamp': '2025-10-01 04:21:13.605365', 'step': 6927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:13.635806', 'step': 6927, 'epoch': 1} {'type': 'loss', 'content': 0.1645803600549698, 'timestamp': '2025-10-01 04:21:13.659494', 'step': 6928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:13.698100', 'step': 6928, 'epoch': 1} {'type': 'loss', 'content': 0.15038517117500305, 'timestamp': '2025-10-01 04:21:13.700310', 'step': 6929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:13.729945', 'step': 6929, 'epoch': 1} {'type': 'loss', 'content': 0.19582068920135498, 'timestamp': '2025-10-01 04:21:13.732036', 'step': 6930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:13.761820', 'step': 6930, 'epoch': 1} {'type': 'loss', 'content': 0.10960603505373001, 'timestamp': '2025-10-01 04:21:13.763948', 'step': 6931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:13.795691', 'step': 6931, 'epoch': 1} {'type': 'loss', 'content': 0.07685660570859909, 'timestamp': '2025-10-01 04:21:13.819381', 'step': 6932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:13.849691', 'step': 6932, 'epoch': 1} {'type': 'loss', 'content': 0.08581308275461197, 'timestamp': '2025-10-01 04:21:13.853233', 'step': 6933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:13.884987', 'step': 6933, 'epoch': 1} {'type': 'loss', 'content': 0.13233612477779388, 'timestamp': '2025-10-01 04:21:13.888397', 'step': 6934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:13.922859', 'step': 6934, 'epoch': 1} {'type': 'loss', 'content': 0.10068642348051071, 'timestamp': '2025-10-01 04:21:13.925032', 'step': 6935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:13.954273', 'step': 6935, 'epoch': 1} {'type': 'loss', 'content': 0.11961773037910461, 'timestamp': '2025-10-01 04:21:13.978058', 'step': 6936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.009309', 'step': 6936, 'epoch': 1} {'type': 'loss', 'content': 0.12135802209377289, 'timestamp': '2025-10-01 04:21:14.011454', 'step': 6937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:14.045084', 'step': 6937, 'epoch': 1} {'type': 'loss', 'content': 0.08602706342935562, 'timestamp': '2025-10-01 04:21:14.048061', 'step': 6938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:21:14.079009', 'step': 6938, 'epoch': 1} {'type': 'loss', 'content': 0.14309334754943848, 'timestamp': '2025-10-01 04:21:14.083777', 'step': 6939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.113945', 'step': 6939, 'epoch': 1} {'type': 'loss', 'content': 0.1566304862499237, 'timestamp': '2025-10-01 04:21:14.141386', 'step': 6940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.182239', 'step': 6940, 'epoch': 1} {'type': 'loss', 'content': 0.17981195449829102, 'timestamp': '2025-10-01 04:21:14.184870', 'step': 6941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.215065', 'step': 6941, 'epoch': 1} {'type': 'loss', 'content': 0.22830456495285034, 'timestamp': '2025-10-01 04:21:14.217809', 'step': 6942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.253433', 'step': 6942, 'epoch': 1} {'type': 'loss', 'content': 0.14693574607372284, 'timestamp': '2025-10-01 04:21:14.255569', 'step': 6943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:14.286125', 'step': 6943, 'epoch': 1} {'type': 'loss', 'content': 0.11223210394382477, 'timestamp': '2025-10-01 04:21:14.310616', 'step': 6944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.341607', 'step': 6944, 'epoch': 1} {'type': 'loss', 'content': 0.16281791031360626, 'timestamp': '2025-10-01 04:21:14.343757', 'step': 6945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.373629', 'step': 6945, 'epoch': 1} {'type': 'loss', 'content': 0.1189325824379921, 'timestamp': '2025-10-01 04:21:14.375758', 'step': 6946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.406322', 'step': 6946, 'epoch': 1} {'type': 'loss', 'content': 0.1605730503797531, 'timestamp': '2025-10-01 04:21:14.408508', 'step': 6947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:14.438822', 'step': 6947, 'epoch': 1} {'type': 'loss', 'content': 0.09192661195993423, 'timestamp': '2025-10-01 04:21:14.462579', 'step': 6948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.499896', 'step': 6948, 'epoch': 1} {'type': 'loss', 'content': 0.14192785322666168, 'timestamp': '2025-10-01 04:21:14.502285', 'step': 6949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:14.534095', 'step': 6949, 'epoch': 1} {'type': 'loss', 'content': 0.21286171674728394, 'timestamp': '2025-10-01 04:21:14.536300', 'step': 6950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:14.568078', 'step': 6950, 'epoch': 1} {'type': 'loss', 'content': 0.13756759464740753, 'timestamp': '2025-10-01 04:21:14.570124', 'step': 6951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:14.602134', 'step': 6951, 'epoch': 1} {'type': 'loss', 'content': 0.11613478511571884, 'timestamp': '2025-10-01 04:21:14.625787', 'step': 6952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.655819', 'step': 6952, 'epoch': 1} {'type': 'loss', 'content': 0.1636512130498886, 'timestamp': '2025-10-01 04:21:14.658043', 'step': 6953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:14.692454', 'step': 6953, 'epoch': 1} {'type': 'loss', 'content': 0.1175321564078331, 'timestamp': '2025-10-01 04:21:14.702993', 'step': 6954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:14.734322', 'step': 6954, 'epoch': 1} {'type': 'loss', 'content': 0.22346927225589752, 'timestamp': '2025-10-01 04:21:14.736923', 'step': 6955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:14.767322', 'step': 6955, 'epoch': 1} {'type': 'loss', 'content': 0.12779270112514496, 'timestamp': '2025-10-01 04:21:14.791165', 'step': 6956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.823423', 'step': 6956, 'epoch': 1} {'type': 'loss', 'content': 0.26891839504241943, 'timestamp': '2025-10-01 04:21:14.825566', 'step': 6957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:14.855422', 'step': 6957, 'epoch': 1} {'type': 'loss', 'content': 0.10127047449350357, 'timestamp': '2025-10-01 04:21:14.857722', 'step': 6958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:14.892937', 'step': 6958, 'epoch': 1} {'type': 'loss', 'content': 0.19980508089065552, 'timestamp': '2025-10-01 04:21:14.895165', 'step': 6959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:14.926010', 'step': 6959, 'epoch': 1} {'type': 'loss', 'content': 0.17414580285549164, 'timestamp': '2025-10-01 04:21:14.950003', 'step': 6960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:14.979875', 'step': 6960, 'epoch': 1} {'type': 'loss', 'content': 0.14627818763256073, 'timestamp': '2025-10-01 04:21:14.982042', 'step': 6961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:15.014894', 'step': 6961, 'epoch': 1} {'type': 'loss', 'content': 0.18514949083328247, 'timestamp': '2025-10-01 04:21:15.017906', 'step': 6962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:15.049113', 'step': 6962, 'epoch': 1} {'type': 'loss', 'content': 0.13123369216918945, 'timestamp': '2025-10-01 04:21:15.051813', 'step': 6963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:15.082135', 'step': 6963, 'epoch': 1} {'type': 'loss', 'content': 0.1800539344549179, 'timestamp': '2025-10-01 04:21:15.106227', 'step': 6964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:15.136594', 'step': 6964, 'epoch': 1} {'type': 'loss', 'content': 0.13813701272010803, 'timestamp': '2025-10-01 04:21:15.139490', 'step': 6965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:15.171420', 'step': 6965, 'epoch': 1} {'type': 'loss', 'content': 0.1205940917134285, 'timestamp': '2025-10-01 04:21:15.173637', 'step': 6966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:15.204011', 'step': 6966, 'epoch': 1} {'type': 'loss', 'content': 0.1331702172756195, 'timestamp': '2025-10-01 04:21:15.206297', 'step': 6967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:15.236320', 'step': 6967, 'epoch': 1} {'type': 'loss', 'content': 0.18532080948352814, 'timestamp': '2025-10-01 04:21:15.259778', 'step': 6968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:15.290675', 'step': 6968, 'epoch': 1} {'type': 'loss', 'content': 0.054678775370121, 'timestamp': '2025-10-01 04:21:15.293153', 'step': 6969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:15.325295', 'step': 6969, 'epoch': 1} {'type': 'loss', 'content': 0.13461662828922272, 'timestamp': '2025-10-01 04:21:15.334970', 'step': 6970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:15.366180', 'step': 6970, 'epoch': 1} {'type': 'loss', 'content': 0.12912748754024506, 'timestamp': '2025-10-01 04:21:15.368875', 'step': 6971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:15.404086', 'step': 6971, 'epoch': 1} {'type': 'loss', 'content': 0.13784357905387878, 'timestamp': '2025-10-01 04:21:15.427714', 'step': 6972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:15.457645', 'step': 6972, 'epoch': 1} {'type': 'loss', 'content': 0.3146406412124634, 'timestamp': '2025-10-01 04:21:15.459986', 'step': 6973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:15.489795', 'step': 6973, 'epoch': 1} {'type': 'loss', 'content': 0.16843312978744507, 'timestamp': '2025-10-01 04:21:15.492014', 'step': 6974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:15.522828', 'step': 6974, 'epoch': 1} {'type': 'loss', 'content': 0.15991903841495514, 'timestamp': '2025-10-01 04:21:15.525101', 'step': 6975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:15.557477', 'step': 6975, 'epoch': 1} {'type': 'loss', 'content': 0.1400798112154007, 'timestamp': '2025-10-01 04:21:15.581212', 'step': 6976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:15.611190', 'step': 6976, 'epoch': 1} {'type': 'loss', 'content': 0.22340470552444458, 'timestamp': '2025-10-01 04:21:15.613500', 'step': 6977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:15.644626', 'step': 6977, 'epoch': 1} {'type': 'loss', 'content': 0.2818201184272766, 'timestamp': '2025-10-01 04:21:15.646960', 'step': 6978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:15.679014', 'step': 6978, 'epoch': 1} {'type': 'loss', 'content': 0.10915637761354446, 'timestamp': '2025-10-01 04:21:15.681092', 'step': 6979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:15.713377', 'step': 6979, 'epoch': 1} {'type': 'loss', 'content': 0.15148425102233887, 'timestamp': '2025-10-01 04:21:15.738197', 'step': 6980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:15.769836', 'step': 6980, 'epoch': 1} {'type': 'loss', 'content': 0.13693726062774658, 'timestamp': '2025-10-01 04:21:15.772269', 'step': 6981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:15.803423', 'step': 6981, 'epoch': 1} {'type': 'loss', 'content': 0.08084908127784729, 'timestamp': '2025-10-01 04:21:15.811773', 'step': 6982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:15.856418', 'step': 6982, 'epoch': 1} {'type': 'loss', 'content': 0.1851748675107956, 'timestamp': '2025-10-01 04:21:15.859155', 'step': 6983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:15.889581', 'step': 6983, 'epoch': 1} {'type': 'loss', 'content': 0.10599776357412338, 'timestamp': '2025-10-01 04:21:15.914870', 'step': 6984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:15.945741', 'step': 6984, 'epoch': 1} {'type': 'loss', 'content': 0.10396440327167511, 'timestamp': '2025-10-01 04:21:15.948220', 'step': 6985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:15.979375', 'step': 6985, 'epoch': 1} {'type': 'loss', 'content': 0.2304055392742157, 'timestamp': '2025-10-01 04:21:15.981672', 'step': 6986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:16.013472', 'step': 6986, 'epoch': 1} {'type': 'loss', 'content': 0.19623272120952606, 'timestamp': '2025-10-01 04:21:16.015736', 'step': 6987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:16.046094', 'step': 6987, 'epoch': 1} {'type': 'loss', 'content': 0.11299194395542145, 'timestamp': '2025-10-01 04:21:16.070011', 'step': 6988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:16.121852', 'step': 6988, 'epoch': 1} {'type': 'loss', 'content': 0.09810259938240051, 'timestamp': '2025-10-01 04:21:16.124928', 'step': 6989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:16.164555', 'step': 6989, 'epoch': 1} {'type': 'loss', 'content': 0.18884868919849396, 'timestamp': '2025-10-01 04:21:16.166723', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:21:24.918008', 'step': 6990, 'epoch': 1} {'type': 'pplx', 'content': 8873.20894387637, 'timestamp': '2025-10-01 04:21:24.921506', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:24.951368', 'step': 6990, 'epoch': 1} {'type': 'loss', 'content': 0.1719396710395813, 'timestamp': '2025-10-01 04:21:24.953756', 'step': 6991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:24.984805', 'step': 6991, 'epoch': 1} {'type': 'loss', 'content': 0.15762005746364594, 'timestamp': '2025-10-01 04:21:25.009160', 'step': 6992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:25.041898', 'step': 6992, 'epoch': 1} {'type': 'loss', 'content': 0.17745733261108398, 'timestamp': '2025-10-01 04:21:25.043924', 'step': 6993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:25.074529', 'step': 6993, 'epoch': 1} {'type': 'loss', 'content': 0.0987439900636673, 'timestamp': '2025-10-01 04:21:25.077062', 'step': 6994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:25.108238', 'step': 6994, 'epoch': 1} {'type': 'loss', 'content': 0.2244470864534378, 'timestamp': '2025-10-01 04:21:25.110351', 'step': 6995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:25.142507', 'step': 6995, 'epoch': 1} {'type': 'loss', 'content': 0.0720507949590683, 'timestamp': '2025-10-01 04:21:25.166202', 'step': 6996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:25.197798', 'step': 6996, 'epoch': 1} {'type': 'loss', 'content': 0.1309286206960678, 'timestamp': '2025-10-01 04:21:25.200571', 'step': 6997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:25.231159', 'step': 6997, 'epoch': 1} {'type': 'loss', 'content': 0.21459805965423584, 'timestamp': '2025-10-01 04:21:25.233515', 'step': 6998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:25.264603', 'step': 6998, 'epoch': 1} {'type': 'loss', 'content': 0.10664142668247223, 'timestamp': '2025-10-01 04:21:25.266789', 'step': 6999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:25.302524', 'step': 6999, 'epoch': 1} {'type': 'loss', 'content': 0.2365468442440033, 'timestamp': '2025-10-01 04:21:25.326386', 'step': 7000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-10-01 04:21:31.030007', 'step': 7000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:31.068267', 'step': 7000, 'epoch': 1} {'type': 'loss', 'content': 0.059299543499946594, 'timestamp': '2025-10-01 04:21:31.070425', 'step': 7001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:31.104585', 'step': 7001, 'epoch': 1} {'type': 'loss', 'content': 0.16189810633659363, 'timestamp': '2025-10-01 04:21:31.106737', 'step': 7002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:31.139031', 'step': 7002, 'epoch': 1} {'type': 'loss', 'content': 0.24033086001873016, 'timestamp': '2025-10-01 04:21:31.141199', 'step': 7003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:31.172594', 'step': 7003, 'epoch': 1} {'type': 'loss', 'content': 0.15920841693878174, 'timestamp': '2025-10-01 04:21:31.196229', 'step': 7004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:31.227682', 'step': 7004, 'epoch': 1} {'type': 'loss', 'content': 0.15043669939041138, 'timestamp': '2025-10-01 04:21:31.236795', 'step': 7005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:31.267207', 'step': 7005, 'epoch': 1} {'type': 'loss', 'content': 0.13297167420387268, 'timestamp': '2025-10-01 04:21:31.269525', 'step': 7006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:31.301627', 'step': 7006, 'epoch': 1} {'type': 'loss', 'content': 0.15135563910007477, 'timestamp': '2025-10-01 04:21:31.304663', 'step': 7007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:31.336167', 'step': 7007, 'epoch': 1} {'type': 'loss', 'content': 0.11047995090484619, 'timestamp': '2025-10-01 04:21:31.360054', 'step': 7008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:31.391518', 'step': 7008, 'epoch': 1} {'type': 'loss', 'content': 0.1598723828792572, 'timestamp': '2025-10-01 04:21:31.393669', 'step': 7009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:31.424285', 'step': 7009, 'epoch': 1} {'type': 'loss', 'content': 0.1072835922241211, 'timestamp': '2025-10-01 04:21:31.426455', 'step': 7010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:31.460683', 'step': 7010, 'epoch': 1} {'type': 'loss', 'content': 0.11501353234052658, 'timestamp': '2025-10-01 04:21:31.463183', 'step': 7011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:31.495221', 'step': 7011, 'epoch': 1} {'type': 'loss', 'content': 0.17511293292045593, 'timestamp': '2025-10-01 04:21:31.519188', 'step': 7012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:31.561713', 'step': 7012, 'epoch': 1} {'type': 'loss', 'content': 0.22652249038219452, 'timestamp': '2025-10-01 04:21:31.568045', 'step': 7013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:31.602198', 'step': 7013, 'epoch': 1} {'type': 'loss', 'content': 0.2137308269739151, 'timestamp': '2025-10-01 04:21:31.604552', 'step': 7014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:31.637055', 'step': 7014, 'epoch': 1} {'type': 'loss', 'content': 0.14943531155586243, 'timestamp': '2025-10-01 04:21:31.639499', 'step': 7015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:31.671150', 'step': 7015, 'epoch': 1} {'type': 'loss', 'content': 0.2087239772081375, 'timestamp': '2025-10-01 04:21:31.695108', 'step': 7016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:31.726083', 'step': 7016, 'epoch': 1} {'type': 'loss', 'content': 0.22153902053833008, 'timestamp': '2025-10-01 04:21:31.728456', 'step': 7017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:31.760496', 'step': 7017, 'epoch': 1} {'type': 'loss', 'content': 0.21032080054283142, 'timestamp': '2025-10-01 04:21:31.763161', 'step': 7018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:31.794569', 'step': 7018, 'epoch': 1} {'type': 'loss', 'content': 0.18998008966445923, 'timestamp': '2025-10-01 04:21:31.796951', 'step': 7019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:31.827172', 'step': 7019, 'epoch': 1} {'type': 'loss', 'content': 0.11643289774656296, 'timestamp': '2025-10-01 04:21:31.850745', 'step': 7020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:31.882812', 'step': 7020, 'epoch': 1} {'type': 'loss', 'content': 0.17113913595676422, 'timestamp': '2025-10-01 04:21:31.884942', 'step': 7021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:31.918435', 'step': 7021, 'epoch': 1} {'type': 'loss', 'content': 0.1306522637605667, 'timestamp': '2025-10-01 04:21:31.920652', 'step': 7022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:31.952510', 'step': 7022, 'epoch': 1} {'type': 'loss', 'content': 0.13499949872493744, 'timestamp': '2025-10-01 04:21:31.954664', 'step': 7023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:31.987366', 'step': 7023, 'epoch': 1} {'type': 'loss', 'content': 0.09930497407913208, 'timestamp': '2025-10-01 04:21:32.011053', 'step': 7024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.050234', 'step': 7024, 'epoch': 1} {'type': 'loss', 'content': 0.18402047455310822, 'timestamp': '2025-10-01 04:21:32.052592', 'step': 7025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:32.082814', 'step': 7025, 'epoch': 1} {'type': 'loss', 'content': 0.28232046961784363, 'timestamp': '2025-10-01 04:21:32.085123', 'step': 7026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.115631', 'step': 7026, 'epoch': 1} {'type': 'loss', 'content': 0.17132489383220673, 'timestamp': '2025-10-01 04:21:32.117511', 'step': 7027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.148425', 'step': 7027, 'epoch': 1} {'type': 'loss', 'content': 0.14557337760925293, 'timestamp': '2025-10-01 04:21:32.172100', 'step': 7028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:21:32.202868', 'step': 7028, 'epoch': 1} {'type': 'loss', 'content': 0.1079302504658699, 'timestamp': '2025-10-01 04:21:32.205105', 'step': 7029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:32.235910', 'step': 7029, 'epoch': 1} {'type': 'loss', 'content': 0.1418563723564148, 'timestamp': '2025-10-01 04:21:32.238786', 'step': 7030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:32.271993', 'step': 7030, 'epoch': 1} {'type': 'loss', 'content': 0.06587576121091843, 'timestamp': '2025-10-01 04:21:32.274154', 'step': 7031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:32.305526', 'step': 7031, 'epoch': 1} {'type': 'loss', 'content': 0.09422125667333603, 'timestamp': '2025-10-01 04:21:32.329247', 'step': 7032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:32.363192', 'step': 7032, 'epoch': 1} {'type': 'loss', 'content': 0.12569299340248108, 'timestamp': '2025-10-01 04:21:32.365407', 'step': 7033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.396284', 'step': 7033, 'epoch': 1} {'type': 'loss', 'content': 0.1585516333580017, 'timestamp': '2025-10-01 04:21:32.398632', 'step': 7034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:32.434587', 'step': 7034, 'epoch': 1} {'type': 'loss', 'content': 0.09334038197994232, 'timestamp': '2025-10-01 04:21:32.436855', 'step': 7035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:32.467354', 'step': 7035, 'epoch': 1} {'type': 'loss', 'content': 0.11851447820663452, 'timestamp': '2025-10-01 04:21:32.490871', 'step': 7036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:32.521529', 'step': 7036, 'epoch': 1} {'type': 'loss', 'content': 0.13786546885967255, 'timestamp': '2025-10-01 04:21:32.524057', 'step': 7037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.555521', 'step': 7037, 'epoch': 1} {'type': 'loss', 'content': 0.11657184362411499, 'timestamp': '2025-10-01 04:21:32.557371', 'step': 7038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:32.587982', 'step': 7038, 'epoch': 1} {'type': 'loss', 'content': 0.1278952956199646, 'timestamp': '2025-10-01 04:21:32.590199', 'step': 7039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.620727', 'step': 7039, 'epoch': 1} {'type': 'loss', 'content': 0.17496974766254425, 'timestamp': '2025-10-01 04:21:32.644735', 'step': 7040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.678312', 'step': 7040, 'epoch': 1} {'type': 'loss', 'content': 0.10698863863945007, 'timestamp': '2025-10-01 04:21:32.681120', 'step': 7041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.711843', 'step': 7041, 'epoch': 1} {'type': 'loss', 'content': 0.22417688369750977, 'timestamp': '2025-10-01 04:21:32.714036', 'step': 7042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.744366', 'step': 7042, 'epoch': 1} {'type': 'loss', 'content': 0.1493554264307022, 'timestamp': '2025-10-01 04:21:32.754106', 'step': 7043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:32.785065', 'step': 7043, 'epoch': 1} {'type': 'loss', 'content': 0.14682674407958984, 'timestamp': '2025-10-01 04:21:32.808752', 'step': 7044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:32.839193', 'step': 7044, 'epoch': 1} {'type': 'loss', 'content': 0.1278986632823944, 'timestamp': '2025-10-01 04:21:32.842030', 'step': 7045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.872988', 'step': 7045, 'epoch': 1} {'type': 'loss', 'content': 0.10336332768201828, 'timestamp': '2025-10-01 04:21:32.875069', 'step': 7046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.905231', 'step': 7046, 'epoch': 1} {'type': 'loss', 'content': 0.18749688565731049, 'timestamp': '2025-10-01 04:21:32.907334', 'step': 7047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:32.937957', 'step': 7047, 'epoch': 1} {'type': 'loss', 'content': 0.19903267920017242, 'timestamp': '2025-10-01 04:21:32.962511', 'step': 7048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:33.005499', 'step': 7048, 'epoch': 1} {'type': 'loss', 'content': 0.12803871929645538, 'timestamp': '2025-10-01 04:21:33.007478', 'step': 7049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:33.039623', 'step': 7049, 'epoch': 1} {'type': 'loss', 'content': 0.1161046251654625, 'timestamp': '2025-10-01 04:21:33.041376', 'step': 7050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:33.071372', 'step': 7050, 'epoch': 1} {'type': 'loss', 'content': 0.21022489666938782, 'timestamp': '2025-10-01 04:21:33.073520', 'step': 7051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:33.105345', 'step': 7051, 'epoch': 1} {'type': 'loss', 'content': 0.20304912328720093, 'timestamp': '2025-10-01 04:21:33.129036', 'step': 7052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:33.162796', 'step': 7052, 'epoch': 1} {'type': 'loss', 'content': 0.1473054438829422, 'timestamp': '2025-10-01 04:21:33.164577', 'step': 7053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:33.195421', 'step': 7053, 'epoch': 1} {'type': 'loss', 'content': 0.16269069910049438, 'timestamp': '2025-10-01 04:21:33.198027', 'step': 7054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:33.228570', 'step': 7054, 'epoch': 1} {'type': 'loss', 'content': 0.15884985029697418, 'timestamp': '2025-10-01 04:21:33.230583', 'step': 7055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:33.263171', 'step': 7055, 'epoch': 1} {'type': 'loss', 'content': 0.08552879095077515, 'timestamp': '2025-10-01 04:21:33.286801', 'step': 7056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:33.317325', 'step': 7056, 'epoch': 1} {'type': 'loss', 'content': 0.18674223124980927, 'timestamp': '2025-10-01 04:21:33.319450', 'step': 7057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:33.350269', 'step': 7057, 'epoch': 1} {'type': 'loss', 'content': 0.1967919021844864, 'timestamp': '2025-10-01 04:21:33.352674', 'step': 7058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:33.399530', 'step': 7058, 'epoch': 1} {'type': 'loss', 'content': 0.26992398500442505, 'timestamp': '2025-10-01 04:21:33.402380', 'step': 7059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:33.433168', 'step': 7059, 'epoch': 1} {'type': 'loss', 'content': 0.3050582706928253, 'timestamp': '2025-10-01 04:21:33.465804', 'step': 7060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:33.496957', 'step': 7060, 'epoch': 1} {'type': 'loss', 'content': 0.13203532993793488, 'timestamp': '2025-10-01 04:21:33.499499', 'step': 7061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:33.530315', 'step': 7061, 'epoch': 1} {'type': 'loss', 'content': 0.28653842210769653, 'timestamp': '2025-10-01 04:21:33.532471', 'step': 7062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:33.564818', 'step': 7062, 'epoch': 1} {'type': 'loss', 'content': 0.2290276437997818, 'timestamp': '2025-10-01 04:21:33.567204', 'step': 7063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:33.599944', 'step': 7063, 'epoch': 1} {'type': 'loss', 'content': 0.2237977534532547, 'timestamp': '2025-10-01 04:21:33.623597', 'step': 7064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:33.654809', 'step': 7064, 'epoch': 1} {'type': 'loss', 'content': 0.1549992710351944, 'timestamp': '2025-10-01 04:21:33.661256', 'step': 7065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:33.693472', 'step': 7065, 'epoch': 1} {'type': 'loss', 'content': 0.20067201554775238, 'timestamp': '2025-10-01 04:21:33.695430', 'step': 7066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:21:33.726101', 'step': 7066, 'epoch': 1} {'type': 'loss', 'content': 0.18407957255840302, 'timestamp': '2025-10-01 04:21:33.730177', 'step': 7067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:33.761311', 'step': 7067, 'epoch': 1} {'type': 'loss', 'content': 0.10302847623825073, 'timestamp': '2025-10-01 04:21:33.784721', 'step': 7068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:33.817464', 'step': 7068, 'epoch': 1} {'type': 'loss', 'content': 0.12550896406173706, 'timestamp': '2025-10-01 04:21:33.819173', 'step': 7069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:33.854456', 'step': 7069, 'epoch': 1} {'type': 'loss', 'content': 0.12086784094572067, 'timestamp': '2025-10-01 04:21:33.856377', 'step': 7070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:33.886623', 'step': 7070, 'epoch': 1} {'type': 'loss', 'content': 0.18044477701187134, 'timestamp': '2025-10-01 04:21:33.888567', 'step': 7071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:33.924337', 'step': 7071, 'epoch': 1} {'type': 'loss', 'content': 0.16036567091941833, 'timestamp': '2025-10-01 04:21:33.948049', 'step': 7072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:33.980300', 'step': 7072, 'epoch': 1} {'type': 'loss', 'content': 0.13947559893131256, 'timestamp': '2025-10-01 04:21:33.983096', 'step': 7073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.013441', 'step': 7073, 'epoch': 1} {'type': 'loss', 'content': 0.14197216928005219, 'timestamp': '2025-10-01 04:21:34.015775', 'step': 7074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:34.052014', 'step': 7074, 'epoch': 1} {'type': 'loss', 'content': 0.08940261602401733, 'timestamp': '2025-10-01 04:21:34.054229', 'step': 7075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.085230', 'step': 7075, 'epoch': 1} {'type': 'loss', 'content': 0.09192696213722229, 'timestamp': '2025-10-01 04:21:34.108566', 'step': 7076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:34.141443', 'step': 7076, 'epoch': 1} {'type': 'loss', 'content': 0.10424100607633591, 'timestamp': '2025-10-01 04:21:34.143503', 'step': 7077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:34.175038', 'step': 7077, 'epoch': 1} {'type': 'loss', 'content': 0.15924526751041412, 'timestamp': '2025-10-01 04:21:34.176970', 'step': 7078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.207476', 'step': 7078, 'epoch': 1} {'type': 'loss', 'content': 0.1682305932044983, 'timestamp': '2025-10-01 04:21:34.209443', 'step': 7079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:34.241894', 'step': 7079, 'epoch': 1} {'type': 'loss', 'content': 0.10453102737665176, 'timestamp': '2025-10-01 04:21:34.265433', 'step': 7080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.300787', 'step': 7080, 'epoch': 1} {'type': 'loss', 'content': 0.1451685130596161, 'timestamp': '2025-10-01 04:21:34.302501', 'step': 7081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.332570', 'step': 7081, 'epoch': 1} {'type': 'loss', 'content': 0.1569506824016571, 'timestamp': '2025-10-01 04:21:34.334456', 'step': 7082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:34.364120', 'step': 7082, 'epoch': 1} {'type': 'loss', 'content': 0.11348923295736313, 'timestamp': '2025-10-01 04:21:34.366358', 'step': 7083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.397569', 'step': 7083, 'epoch': 1} {'type': 'loss', 'content': 0.15639425814151764, 'timestamp': '2025-10-01 04:21:34.420710', 'step': 7084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:34.451188', 'step': 7084, 'epoch': 1} {'type': 'loss', 'content': 0.1966577172279358, 'timestamp': '2025-10-01 04:21:34.453454', 'step': 7085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.484794', 'step': 7085, 'epoch': 1} {'type': 'loss', 'content': 0.15980541706085205, 'timestamp': '2025-10-01 04:21:34.487075', 'step': 7086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.521539', 'step': 7086, 'epoch': 1} {'type': 'loss', 'content': 0.18651817739009857, 'timestamp': '2025-10-01 04:21:34.523837', 'step': 7087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.554031', 'step': 7087, 'epoch': 1} {'type': 'loss', 'content': 0.13116373121738434, 'timestamp': '2025-10-01 04:21:34.578147', 'step': 7088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.608463', 'step': 7088, 'epoch': 1} {'type': 'loss', 'content': 0.09896636754274368, 'timestamp': '2025-10-01 04:21:34.610413', 'step': 7089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:34.641984', 'step': 7089, 'epoch': 1} {'type': 'loss', 'content': 0.0472237765789032, 'timestamp': '2025-10-01 04:21:34.645096', 'step': 7090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.677002', 'step': 7090, 'epoch': 1} {'type': 'loss', 'content': 0.1579706072807312, 'timestamp': '2025-10-01 04:21:34.679092', 'step': 7091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:34.712846', 'step': 7091, 'epoch': 1} {'type': 'loss', 'content': 0.11108623445034027, 'timestamp': '2025-10-01 04:21:34.736210', 'step': 7092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:34.769983', 'step': 7092, 'epoch': 1} {'type': 'loss', 'content': 0.09730926901102066, 'timestamp': '2025-10-01 04:21:34.771883', 'step': 7093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:34.802628', 'step': 7093, 'epoch': 1} {'type': 'loss', 'content': 0.10465021431446075, 'timestamp': '2025-10-01 04:21:34.804547', 'step': 7094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.835387', 'step': 7094, 'epoch': 1} {'type': 'loss', 'content': 0.20839641988277435, 'timestamp': '2025-10-01 04:21:34.837337', 'step': 7095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:34.870209', 'step': 7095, 'epoch': 1} {'type': 'loss', 'content': 0.15927591919898987, 'timestamp': '2025-10-01 04:21:34.893505', 'step': 7096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:34.925031', 'step': 7096, 'epoch': 1} {'type': 'loss', 'content': 0.11935067176818848, 'timestamp': '2025-10-01 04:21:34.927130', 'step': 7097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:34.958047', 'step': 7097, 'epoch': 1} {'type': 'loss', 'content': 0.1943449079990387, 'timestamp': '2025-10-01 04:21:34.959908', 'step': 7098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:34.989828', 'step': 7098, 'epoch': 1} {'type': 'loss', 'content': 0.10934853553771973, 'timestamp': '2025-10-01 04:21:34.992182', 'step': 7099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:35.025548', 'step': 7099, 'epoch': 1} {'type': 'loss', 'content': 0.16771872341632843, 'timestamp': '2025-10-01 04:21:35.049053', 'step': 7100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:35.083751', 'step': 7100, 'epoch': 1} {'type': 'loss', 'content': 0.1624344289302826, 'timestamp': '2025-10-01 04:21:35.085820', 'step': 7101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:35.118334', 'step': 7101, 'epoch': 1} {'type': 'loss', 'content': 0.1992601454257965, 'timestamp': '2025-10-01 04:21:35.120535', 'step': 7102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:35.150020', 'step': 7102, 'epoch': 1} {'type': 'loss', 'content': 0.13816553354263306, 'timestamp': '2025-10-01 04:21:35.152312', 'step': 7103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:35.183108', 'step': 7103, 'epoch': 1} {'type': 'loss', 'content': 0.18665814399719238, 'timestamp': '2025-10-01 04:21:35.207327', 'step': 7104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:35.237618', 'step': 7104, 'epoch': 1} {'type': 'loss', 'content': 0.08845266699790955, 'timestamp': '2025-10-01 04:21:35.239576', 'step': 7105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:35.270188', 'step': 7105, 'epoch': 1} {'type': 'loss', 'content': 0.16346657276153564, 'timestamp': '2025-10-01 04:21:35.272148', 'step': 7106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:35.305893', 'step': 7106, 'epoch': 1} {'type': 'loss', 'content': 0.10951828211545944, 'timestamp': '2025-10-01 04:21:35.307806', 'step': 7107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:21:35.337945', 'step': 7107, 'epoch': 1} {'type': 'loss', 'content': 0.17471644282341003, 'timestamp': '2025-10-01 04:21:35.363254', 'step': 7108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:35.393062', 'step': 7108, 'epoch': 1} {'type': 'loss', 'content': 0.09379167854785919, 'timestamp': '2025-10-01 04:21:35.395257', 'step': 7109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:35.440720', 'step': 7109, 'epoch': 1} {'type': 'loss', 'content': 0.12466515600681305, 'timestamp': '2025-10-01 04:21:35.442986', 'step': 7110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:35.474641', 'step': 7110, 'epoch': 1} {'type': 'loss', 'content': 0.13642464578151703, 'timestamp': '2025-10-01 04:21:35.476979', 'step': 7111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:35.514717', 'step': 7111, 'epoch': 1} {'type': 'loss', 'content': 0.2151462882757187, 'timestamp': '2025-10-01 04:21:35.538610', 'step': 7112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:35.570404', 'step': 7112, 'epoch': 1} {'type': 'loss', 'content': 0.13732364773750305, 'timestamp': '2025-10-01 04:21:35.572831', 'step': 7113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:35.602879', 'step': 7113, 'epoch': 1} {'type': 'loss', 'content': 0.08434037864208221, 'timestamp': '2025-10-01 04:21:35.606384', 'step': 7114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:35.644163', 'step': 7114, 'epoch': 1} {'type': 'loss', 'content': 0.16482220590114594, 'timestamp': '2025-10-01 04:21:35.646898', 'step': 7115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:35.677349', 'step': 7115, 'epoch': 1} {'type': 'loss', 'content': 0.09048981964588165, 'timestamp': '2025-10-01 04:21:35.704233', 'step': 7116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:35.733628', 'step': 7116, 'epoch': 1} {'type': 'loss', 'content': 0.18932268023490906, 'timestamp': '2025-10-01 04:21:35.735878', 'step': 7117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:35.766407', 'step': 7117, 'epoch': 1} {'type': 'loss', 'content': 0.25009721517562866, 'timestamp': '2025-10-01 04:21:35.768599', 'step': 7118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:35.800694', 'step': 7118, 'epoch': 1} {'type': 'loss', 'content': 0.09665986895561218, 'timestamp': '2025-10-01 04:21:35.802510', 'step': 7119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:35.838574', 'step': 7119, 'epoch': 1} {'type': 'loss', 'content': 0.10269148647785187, 'timestamp': '2025-10-01 04:21:35.861876', 'step': 7120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:35.892285', 'step': 7120, 'epoch': 1} {'type': 'loss', 'content': 0.16096414625644684, 'timestamp': '2025-10-01 04:21:35.894483', 'step': 7121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:35.925142', 'step': 7121, 'epoch': 1} {'type': 'loss', 'content': 0.10240640491247177, 'timestamp': '2025-10-01 04:21:35.927259', 'step': 7122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:35.957315', 'step': 7122, 'epoch': 1} {'type': 'loss', 'content': 0.1429080218076706, 'timestamp': '2025-10-01 04:21:35.959533', 'step': 7123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:35.990988', 'step': 7123, 'epoch': 1} {'type': 'loss', 'content': 0.1125282496213913, 'timestamp': '2025-10-01 04:21:36.014734', 'step': 7124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:36.049888', 'step': 7124, 'epoch': 1} {'type': 'loss', 'content': 0.09971494227647781, 'timestamp': '2025-10-01 04:21:36.052038', 'step': 7125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:36.083192', 'step': 7125, 'epoch': 1} {'type': 'loss', 'content': 0.09380662441253662, 'timestamp': '2025-10-01 04:21:36.085216', 'step': 7126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:36.116532', 'step': 7126, 'epoch': 1} {'type': 'loss', 'content': 0.08450799435377121, 'timestamp': '2025-10-01 04:21:36.118571', 'step': 7127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:36.159214', 'step': 7127, 'epoch': 1} {'type': 'loss', 'content': 0.23580166697502136, 'timestamp': '2025-10-01 04:21:36.182980', 'step': 7128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:36.215181', 'step': 7128, 'epoch': 1} {'type': 'loss', 'content': 0.15428274869918823, 'timestamp': '2025-10-01 04:21:36.217031', 'step': 7129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:36.247814', 'step': 7129, 'epoch': 1} {'type': 'loss', 'content': 0.2107970267534256, 'timestamp': '2025-10-01 04:21:36.249957', 'step': 7130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:36.285196', 'step': 7130, 'epoch': 1} {'type': 'loss', 'content': 0.11124169081449509, 'timestamp': '2025-10-01 04:21:36.287164', 'step': 7131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:36.318639', 'step': 7131, 'epoch': 1} {'type': 'loss', 'content': 0.1859661042690277, 'timestamp': '2025-10-01 04:21:36.342035', 'step': 7132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:36.387252', 'step': 7132, 'epoch': 1} {'type': 'loss', 'content': 0.1412159949541092, 'timestamp': '2025-10-01 04:21:36.389633', 'step': 7133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:36.421261', 'step': 7133, 'epoch': 1} {'type': 'loss', 'content': 0.1071866899728775, 'timestamp': '2025-10-01 04:21:36.424594', 'step': 7134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:36.462006', 'step': 7134, 'epoch': 1} {'type': 'loss', 'content': 0.153583824634552, 'timestamp': '2025-10-01 04:21:36.464106', 'step': 7135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:36.502117', 'step': 7135, 'epoch': 1} {'type': 'loss', 'content': 0.15000085532665253, 'timestamp': '2025-10-01 04:21:36.526209', 'step': 7136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:36.560181', 'step': 7136, 'epoch': 1} {'type': 'loss', 'content': 0.09309235960245132, 'timestamp': '2025-10-01 04:21:36.562241', 'step': 7137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:36.597126', 'step': 7137, 'epoch': 1} {'type': 'loss', 'content': 0.09230254590511322, 'timestamp': '2025-10-01 04:21:36.599548', 'step': 7138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:36.631128', 'step': 7138, 'epoch': 1} {'type': 'loss', 'content': 0.0627017393708229, 'timestamp': '2025-10-01 04:21:36.633286', 'step': 7139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:36.667394', 'step': 7139, 'epoch': 1} {'type': 'loss', 'content': 0.1211533471941948, 'timestamp': '2025-10-01 04:21:36.690664', 'step': 7140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:36.726654', 'step': 7140, 'epoch': 1} {'type': 'loss', 'content': 0.10499781370162964, 'timestamp': '2025-10-01 04:21:36.728806', 'step': 7141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:36.779502', 'step': 7141, 'epoch': 1} {'type': 'loss', 'content': 0.09980284422636032, 'timestamp': '2025-10-01 04:21:36.781527', 'step': 7142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:36.816695', 'step': 7142, 'epoch': 1} {'type': 'loss', 'content': 0.17441695928573608, 'timestamp': '2025-10-01 04:21:36.818751', 'step': 7143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:36.853794', 'step': 7143, 'epoch': 1} {'type': 'loss', 'content': 0.15637856721878052, 'timestamp': '2025-10-01 04:21:36.877317', 'step': 7144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:36.912996', 'step': 7144, 'epoch': 1} {'type': 'loss', 'content': 0.08324756473302841, 'timestamp': '2025-10-01 04:21:36.915354', 'step': 7145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:36.964703', 'step': 7145, 'epoch': 1} {'type': 'loss', 'content': 0.1694137006998062, 'timestamp': '2025-10-01 04:21:36.967131', 'step': 7146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:37.005179', 'step': 7146, 'epoch': 1} {'type': 'loss', 'content': 0.09671692550182343, 'timestamp': '2025-10-01 04:21:37.007505', 'step': 7147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:37.037974', 'step': 7147, 'epoch': 1} {'type': 'loss', 'content': 0.10024422407150269, 'timestamp': '2025-10-01 04:21:37.061534', 'step': 7148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:37.092880', 'step': 7148, 'epoch': 1} {'type': 'loss', 'content': 0.1756831705570221, 'timestamp': '2025-10-01 04:21:37.094867', 'step': 7149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:37.124930', 'step': 7149, 'epoch': 1} {'type': 'loss', 'content': 0.13275383412837982, 'timestamp': '2025-10-01 04:21:37.126720', 'step': 7150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:37.157387', 'step': 7150, 'epoch': 1} {'type': 'loss', 'content': 0.11663570255041122, 'timestamp': '2025-10-01 04:21:37.159239', 'step': 7151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:37.189737', 'step': 7151, 'epoch': 1} {'type': 'loss', 'content': 0.23012101650238037, 'timestamp': '2025-10-01 04:21:37.213113', 'step': 7152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:37.243964', 'step': 7152, 'epoch': 1} {'type': 'loss', 'content': 0.1034562960267067, 'timestamp': '2025-10-01 04:21:37.246029', 'step': 7153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:37.277188', 'step': 7153, 'epoch': 1} {'type': 'loss', 'content': 0.11875347793102264, 'timestamp': '2025-10-01 04:21:37.279483', 'step': 7154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:37.323941', 'step': 7154, 'epoch': 1} {'type': 'loss', 'content': 0.10928969085216522, 'timestamp': '2025-10-01 04:21:37.326563', 'step': 7155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:37.357574', 'step': 7155, 'epoch': 1} {'type': 'loss', 'content': 0.12808652222156525, 'timestamp': '2025-10-01 04:21:37.380891', 'step': 7156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:37.412859', 'step': 7156, 'epoch': 1} {'type': 'loss', 'content': 0.12622246146202087, 'timestamp': '2025-10-01 04:21:37.415021', 'step': 7157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:21:37.456119', 'step': 7157, 'epoch': 1} {'type': 'loss', 'content': 0.16826483607292175, 'timestamp': '2025-10-01 04:21:37.460820', 'step': 7158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:37.495724', 'step': 7158, 'epoch': 1} {'type': 'loss', 'content': 0.18840844929218292, 'timestamp': '2025-10-01 04:21:37.497790', 'step': 7159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:37.540297', 'step': 7159, 'epoch': 1} {'type': 'loss', 'content': 0.09905047714710236, 'timestamp': '2025-10-01 04:21:37.564135', 'step': 7160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:37.607868', 'step': 7160, 'epoch': 1} {'type': 'loss', 'content': 0.25784435868263245, 'timestamp': '2025-10-01 04:21:37.609916', 'step': 7161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:37.654097', 'step': 7161, 'epoch': 1} {'type': 'loss', 'content': 0.156806081533432, 'timestamp': '2025-10-01 04:21:37.666087', 'step': 7162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:37.698543', 'step': 7162, 'epoch': 1} {'type': 'loss', 'content': 0.17233020067214966, 'timestamp': '2025-10-01 04:21:37.700526', 'step': 7163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:37.732388', 'step': 7163, 'epoch': 1} {'type': 'loss', 'content': 0.1360713243484497, 'timestamp': '2025-10-01 04:21:37.756086', 'step': 7164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:37.804151', 'step': 7164, 'epoch': 1} {'type': 'loss', 'content': 0.15681615471839905, 'timestamp': '2025-10-01 04:21:37.806128', 'step': 7165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:37.840650', 'step': 7165, 'epoch': 1} {'type': 'loss', 'content': 0.11386849731206894, 'timestamp': '2025-10-01 04:21:37.842627', 'step': 7166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:37.888016', 'step': 7166, 'epoch': 1} {'type': 'loss', 'content': 0.2645559310913086, 'timestamp': '2025-10-01 04:21:37.890131', 'step': 7167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:37.923968', 'step': 7167, 'epoch': 1} {'type': 'loss', 'content': 0.10097337514162064, 'timestamp': '2025-10-01 04:21:37.947780', 'step': 7168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:37.984009', 'step': 7168, 'epoch': 1} {'type': 'loss', 'content': 0.18560533225536346, 'timestamp': '2025-10-01 04:21:37.987542', 'step': 7169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:38.029276', 'step': 7169, 'epoch': 1} {'type': 'loss', 'content': 0.09645968675613403, 'timestamp': '2025-10-01 04:21:38.031421', 'step': 7170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.064940', 'step': 7170, 'epoch': 1} {'type': 'loss', 'content': 0.13253918290138245, 'timestamp': '2025-10-01 04:21:38.066891', 'step': 7171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.101977', 'step': 7171, 'epoch': 1} {'type': 'loss', 'content': 0.16885881125926971, 'timestamp': '2025-10-01 04:21:38.125646', 'step': 7172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:38.163592', 'step': 7172, 'epoch': 1} {'type': 'loss', 'content': 0.15526297688484192, 'timestamp': '2025-10-01 04:21:38.165759', 'step': 7173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:38.197625', 'step': 7173, 'epoch': 1} {'type': 'loss', 'content': 0.13267624378204346, 'timestamp': '2025-10-01 04:21:38.201569', 'step': 7174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:38.240988', 'step': 7174, 'epoch': 1} {'type': 'loss', 'content': 0.12482401728630066, 'timestamp': '2025-10-01 04:21:38.243496', 'step': 7175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:38.278220', 'step': 7175, 'epoch': 1} {'type': 'loss', 'content': 0.16518405079841614, 'timestamp': '2025-10-01 04:21:38.304621', 'step': 7176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.344128', 'step': 7176, 'epoch': 1} {'type': 'loss', 'content': 0.20725099742412567, 'timestamp': '2025-10-01 04:21:38.346299', 'step': 7177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.379463', 'step': 7177, 'epoch': 1} {'type': 'loss', 'content': 0.09824158996343613, 'timestamp': '2025-10-01 04:21:38.387464', 'step': 7178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:38.419436', 'step': 7178, 'epoch': 1} {'type': 'loss', 'content': 0.08305926620960236, 'timestamp': '2025-10-01 04:21:38.421961', 'step': 7179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.455667', 'step': 7179, 'epoch': 1} {'type': 'loss', 'content': 0.14327244460582733, 'timestamp': '2025-10-01 04:21:38.479192', 'step': 7180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:38.510862', 'step': 7180, 'epoch': 1} {'type': 'loss', 'content': 0.2273278832435608, 'timestamp': '2025-10-01 04:21:38.513026', 'step': 7181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:38.545975', 'step': 7181, 'epoch': 1} {'type': 'loss', 'content': 0.1351270079612732, 'timestamp': '2025-10-01 04:21:38.548149', 'step': 7182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:38.582157', 'step': 7182, 'epoch': 1} {'type': 'loss', 'content': 0.14388403296470642, 'timestamp': '2025-10-01 04:21:38.589676', 'step': 7183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:38.625123', 'step': 7183, 'epoch': 1} {'type': 'loss', 'content': 0.20161953568458557, 'timestamp': '2025-10-01 04:21:38.648787', 'step': 7184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.681476', 'step': 7184, 'epoch': 1} {'type': 'loss', 'content': 0.1768849790096283, 'timestamp': '2025-10-01 04:21:38.683667', 'step': 7185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.730790', 'step': 7185, 'epoch': 1} {'type': 'loss', 'content': 0.10249391943216324, 'timestamp': '2025-10-01 04:21:38.733414', 'step': 7186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.772013', 'step': 7186, 'epoch': 1} {'type': 'loss', 'content': 0.17588061094284058, 'timestamp': '2025-10-01 04:21:38.774085', 'step': 7187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.814208', 'step': 7187, 'epoch': 1} {'type': 'loss', 'content': 0.1387673020362854, 'timestamp': '2025-10-01 04:21:38.837959', 'step': 7188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:38.869857', 'step': 7188, 'epoch': 1} {'type': 'loss', 'content': 0.24249416589736938, 'timestamp': '2025-10-01 04:21:38.872059', 'step': 7189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:38.904028', 'step': 7189, 'epoch': 1} {'type': 'loss', 'content': 0.11478520929813385, 'timestamp': '2025-10-01 04:21:38.906346', 'step': 7190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.937228', 'step': 7190, 'epoch': 1} {'type': 'loss', 'content': 0.15813633799552917, 'timestamp': '2025-10-01 04:21:38.939471', 'step': 7191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:38.971612', 'step': 7191, 'epoch': 1} {'type': 'loss', 'content': 0.1906770020723343, 'timestamp': '2025-10-01 04:21:38.995464', 'step': 7192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:39.040306', 'step': 7192, 'epoch': 1} {'type': 'loss', 'content': 0.1802324801683426, 'timestamp': '2025-10-01 04:21:39.042404', 'step': 7193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:39.078132', 'step': 7193, 'epoch': 1} {'type': 'loss', 'content': 0.1587039828300476, 'timestamp': '2025-10-01 04:21:39.080281', 'step': 7194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:39.115011', 'step': 7194, 'epoch': 1} {'type': 'loss', 'content': 0.18315961956977844, 'timestamp': '2025-10-01 04:21:39.117165', 'step': 7195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:39.164860', 'step': 7195, 'epoch': 1} {'type': 'loss', 'content': 0.16306659579277039, 'timestamp': '2025-10-01 04:21:39.188441', 'step': 7196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:39.220159', 'step': 7196, 'epoch': 1} {'type': 'loss', 'content': 0.19698300957679749, 'timestamp': '2025-10-01 04:21:39.222336', 'step': 7197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:39.257469', 'step': 7197, 'epoch': 1} {'type': 'loss', 'content': 0.07042046636343002, 'timestamp': '2025-10-01 04:21:39.259645', 'step': 7198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:39.292313', 'step': 7198, 'epoch': 1} {'type': 'loss', 'content': 0.13641715049743652, 'timestamp': '2025-10-01 04:21:39.297736', 'step': 7199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:39.335963', 'step': 7199, 'epoch': 1} {'type': 'loss', 'content': 0.13196757435798645, 'timestamp': '2025-10-01 04:21:39.359520', 'step': 7200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:39.397394', 'step': 7200, 'epoch': 1} {'type': 'loss', 'content': 0.06647606194019318, 'timestamp': '2025-10-01 04:21:39.399388', 'step': 7201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:39.439899', 'step': 7201, 'epoch': 1} {'type': 'loss', 'content': 0.14186500012874603, 'timestamp': '2025-10-01 04:21:39.442319', 'step': 7202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:39.479074', 'step': 7202, 'epoch': 1} {'type': 'loss', 'content': 0.2375003546476364, 'timestamp': '2025-10-01 04:21:39.487099', 'step': 7203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:39.523319', 'step': 7203, 'epoch': 1} {'type': 'loss', 'content': 0.16495634615421295, 'timestamp': '2025-10-01 04:21:39.547588', 'step': 7204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:39.579706', 'step': 7204, 'epoch': 1} {'type': 'loss', 'content': 0.22949109971523285, 'timestamp': '2025-10-01 04:21:39.581902', 'step': 7205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:39.622167', 'step': 7205, 'epoch': 1} {'type': 'loss', 'content': 0.12354864180088043, 'timestamp': '2025-10-01 04:21:39.624382', 'step': 7206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:39.666667', 'step': 7206, 'epoch': 1} {'type': 'loss', 'content': 0.20753683149814606, 'timestamp': '2025-10-01 04:21:39.668887', 'step': 7207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:39.704019', 'step': 7207, 'epoch': 1} {'type': 'loss', 'content': 0.15859472751617432, 'timestamp': '2025-10-01 04:21:39.727700', 'step': 7208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:39.764213', 'step': 7208, 'epoch': 1} {'type': 'loss', 'content': 0.14333942532539368, 'timestamp': '2025-10-01 04:21:39.766365', 'step': 7209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:21:39.801231', 'step': 7209, 'epoch': 1} {'type': 'loss', 'content': 0.12129881232976913, 'timestamp': '2025-10-01 04:21:39.805710', 'step': 7210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:39.840197', 'step': 7210, 'epoch': 1} {'type': 'loss', 'content': 0.08761321753263474, 'timestamp': '2025-10-01 04:21:39.842282', 'step': 7211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:39.904297', 'step': 7211, 'epoch': 1} {'type': 'loss', 'content': 0.11761881411075592, 'timestamp': '2025-10-01 04:21:39.927933', 'step': 7212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:39.966109', 'step': 7212, 'epoch': 1} {'type': 'loss', 'content': 0.14319349825382233, 'timestamp': '2025-10-01 04:21:39.968450', 'step': 7213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:40.024067', 'step': 7213, 'epoch': 1} {'type': 'loss', 'content': 0.15460234880447388, 'timestamp': '2025-10-01 04:21:40.026261', 'step': 7214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:40.068860', 'step': 7214, 'epoch': 1} {'type': 'loss', 'content': 0.11628346145153046, 'timestamp': '2025-10-01 04:21:40.071575', 'step': 7215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:40.116923', 'step': 7215, 'epoch': 1} {'type': 'loss', 'content': 0.19989579916000366, 'timestamp': '2025-10-01 04:21:40.142837', 'step': 7216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:40.177444', 'step': 7216, 'epoch': 1} {'type': 'loss', 'content': 0.1722906231880188, 'timestamp': '2025-10-01 04:21:40.179935', 'step': 7217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:40.224098', 'step': 7217, 'epoch': 1} {'type': 'loss', 'content': 0.08011459559202194, 'timestamp': '2025-10-01 04:21:40.226819', 'step': 7218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:40.263454', 'step': 7218, 'epoch': 1} {'type': 'loss', 'content': 0.08421096950769424, 'timestamp': '2025-10-01 04:21:40.266370', 'step': 7219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:40.302098', 'step': 7219, 'epoch': 1} {'type': 'loss', 'content': 0.1498185098171234, 'timestamp': '2025-10-01 04:21:40.325878', 'step': 7220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:40.359470', 'step': 7220, 'epoch': 1} {'type': 'loss', 'content': 0.14352786540985107, 'timestamp': '2025-10-01 04:21:40.361549', 'step': 7221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:40.403110', 'step': 7221, 'epoch': 1} {'type': 'loss', 'content': 0.1313229650259018, 'timestamp': '2025-10-01 04:21:40.405155', 'step': 7222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:40.447297', 'step': 7222, 'epoch': 1} {'type': 'loss', 'content': 0.15727078914642334, 'timestamp': '2025-10-01 04:21:40.449869', 'step': 7223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:40.484243', 'step': 7223, 'epoch': 1} {'type': 'loss', 'content': 0.09861533343791962, 'timestamp': '2025-10-01 04:21:40.508334', 'step': 7224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:40.543462', 'step': 7224, 'epoch': 1} {'type': 'loss', 'content': 0.1978897601366043, 'timestamp': '2025-10-01 04:21:40.545857', 'step': 7225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:40.582616', 'step': 7225, 'epoch': 1} {'type': 'loss', 'content': 0.15458105504512787, 'timestamp': '2025-10-01 04:21:40.585028', 'step': 7226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:40.620454', 'step': 7226, 'epoch': 1} {'type': 'loss', 'content': 0.1379433423280716, 'timestamp': '2025-10-01 04:21:40.623018', 'step': 7227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:40.657051', 'step': 7227, 'epoch': 1} {'type': 'loss', 'content': 0.09589457511901855, 'timestamp': '2025-10-01 04:21:40.704360', 'step': 7228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:21:40.737668', 'step': 7228, 'epoch': 1} {'type': 'loss', 'content': 0.16112272441387177, 'timestamp': '2025-10-01 04:21:40.740528', 'step': 7229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:40.773446', 'step': 7229, 'epoch': 1} {'type': 'loss', 'content': 0.10976318269968033, 'timestamp': '2025-10-01 04:21:40.776097', 'step': 7230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:40.818383', 'step': 7230, 'epoch': 1} {'type': 'loss', 'content': 0.17214317619800568, 'timestamp': '2025-10-01 04:21:40.820896', 'step': 7231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:40.852376', 'step': 7231, 'epoch': 1} {'type': 'loss', 'content': 0.10423579812049866, 'timestamp': '2025-10-01 04:21:40.876457', 'step': 7232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:40.908867', 'step': 7232, 'epoch': 1} {'type': 'loss', 'content': 0.10761961340904236, 'timestamp': '2025-10-01 04:21:40.911295', 'step': 7233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:40.943845', 'step': 7233, 'epoch': 1} {'type': 'loss', 'content': 0.08996015042066574, 'timestamp': '2025-10-01 04:21:40.946531', 'step': 7234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:40.978989', 'step': 7234, 'epoch': 1} {'type': 'loss', 'content': 0.06377284228801727, 'timestamp': '2025-10-01 04:21:40.981658', 'step': 7235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:41.013264', 'step': 7235, 'epoch': 1} {'type': 'loss', 'content': 0.0895557627081871, 'timestamp': '2025-10-01 04:21:41.037187', 'step': 7236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:41.078176', 'step': 7236, 'epoch': 1} {'type': 'loss', 'content': 0.12288876622915268, 'timestamp': '2025-10-01 04:21:41.080167', 'step': 7237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:41.115944', 'step': 7237, 'epoch': 1} {'type': 'loss', 'content': 0.06427022069692612, 'timestamp': '2025-10-01 04:21:41.118509', 'step': 7238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:41.154802', 'step': 7238, 'epoch': 1} {'type': 'loss', 'content': 0.13690511882305145, 'timestamp': '2025-10-01 04:21:41.156981', 'step': 7239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:41.199023', 'step': 7239, 'epoch': 1} {'type': 'loss', 'content': 0.22563323378562927, 'timestamp': '2025-10-01 04:21:41.228881', 'step': 7240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:41.263009', 'step': 7240, 'epoch': 1} {'type': 'loss', 'content': 0.11641361564397812, 'timestamp': '2025-10-01 04:21:41.270970', 'step': 7241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:41.305316', 'step': 7241, 'epoch': 1} {'type': 'loss', 'content': 0.07168178260326385, 'timestamp': '2025-10-01 04:21:41.307969', 'step': 7242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:41.359166', 'step': 7242, 'epoch': 1} {'type': 'loss', 'content': 0.21346664428710938, 'timestamp': '2025-10-01 04:21:41.362910', 'step': 7243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:41.398679', 'step': 7243, 'epoch': 1} {'type': 'loss', 'content': 0.10211072117090225, 'timestamp': '2025-10-01 04:21:41.422449', 'step': 7244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:41.465355', 'step': 7244, 'epoch': 1} {'type': 'loss', 'content': 0.17983901500701904, 'timestamp': '2025-10-01 04:21:41.467593', 'step': 7245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:41.514014', 'step': 7245, 'epoch': 1} {'type': 'loss', 'content': 0.146305650472641, 'timestamp': '2025-10-01 04:21:41.516106', 'step': 7246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:41.568465', 'step': 7246, 'epoch': 1} {'type': 'loss', 'content': 0.08928198367357254, 'timestamp': '2025-10-01 04:21:41.570631', 'step': 7247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:41.616998', 'step': 7247, 'epoch': 1} {'type': 'loss', 'content': 0.20065200328826904, 'timestamp': '2025-10-01 04:21:41.640666', 'step': 7248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:41.683687', 'step': 7248, 'epoch': 1} {'type': 'loss', 'content': 0.09446645528078079, 'timestamp': '2025-10-01 04:21:41.685880', 'step': 7249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:41.740758', 'step': 7249, 'epoch': 1} {'type': 'loss', 'content': 0.10615243017673492, 'timestamp': '2025-10-01 04:21:41.743353', 'step': 7250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:41.796068', 'step': 7250, 'epoch': 1} {'type': 'loss', 'content': 0.1269148290157318, 'timestamp': '2025-10-01 04:21:41.798359', 'step': 7251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:41.853960', 'step': 7251, 'epoch': 1} {'type': 'loss', 'content': 0.10131679475307465, 'timestamp': '2025-10-01 04:21:41.880377', 'step': 7252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:41.924172', 'step': 7252, 'epoch': 1} {'type': 'loss', 'content': 0.22394925355911255, 'timestamp': '2025-10-01 04:21:41.926321', 'step': 7253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:41.971939', 'step': 7253, 'epoch': 1} {'type': 'loss', 'content': 0.13253282010555267, 'timestamp': '2025-10-01 04:21:41.974446', 'step': 7254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:42.022126', 'step': 7254, 'epoch': 1} {'type': 'loss', 'content': 0.09788067638874054, 'timestamp': '2025-10-01 04:21:42.024446', 'step': 7255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:42.078604', 'step': 7255, 'epoch': 1} {'type': 'loss', 'content': 0.13828207552433014, 'timestamp': '2025-10-01 04:21:42.102284', 'step': 7256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:42.142685', 'step': 7256, 'epoch': 1} {'type': 'loss', 'content': 0.1319180577993393, 'timestamp': '2025-10-01 04:21:42.145515', 'step': 7257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:42.197996', 'step': 7257, 'epoch': 1} {'type': 'loss', 'content': 0.16503849625587463, 'timestamp': '2025-10-01 04:21:42.200654', 'step': 7258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:42.238389', 'step': 7258, 'epoch': 1} {'type': 'loss', 'content': 0.16601301729679108, 'timestamp': '2025-10-01 04:21:42.240984', 'step': 7259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:42.278682', 'step': 7259, 'epoch': 1} {'type': 'loss', 'content': 0.23173785209655762, 'timestamp': '2025-10-01 04:21:42.302399', 'step': 7260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:42.354471', 'step': 7260, 'epoch': 1} {'type': 'loss', 'content': 0.23355954885482788, 'timestamp': '2025-10-01 04:21:42.359557', 'step': 7261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:42.410799', 'step': 7261, 'epoch': 1} {'type': 'loss', 'content': 0.10642346739768982, 'timestamp': '2025-10-01 04:21:42.413156', 'step': 7262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:42.448988', 'step': 7262, 'epoch': 1} {'type': 'loss', 'content': 0.1533798724412918, 'timestamp': '2025-10-01 04:21:42.451245', 'step': 7263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:42.483812', 'step': 7263, 'epoch': 1} {'type': 'loss', 'content': 0.12179625034332275, 'timestamp': '2025-10-01 04:21:42.507514', 'step': 7264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:42.544578', 'step': 7264, 'epoch': 1} {'type': 'loss', 'content': 0.20431041717529297, 'timestamp': '2025-10-01 04:21:42.546833', 'step': 7265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:42.610746', 'step': 7265, 'epoch': 1} {'type': 'loss', 'content': 0.14615990221500397, 'timestamp': '2025-10-01 04:21:42.612948', 'step': 7266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:42.643590', 'step': 7266, 'epoch': 1} {'type': 'loss', 'content': 0.05071983113884926, 'timestamp': '2025-10-01 04:21:42.645636', 'step': 7267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:42.681837', 'step': 7267, 'epoch': 1} {'type': 'loss', 'content': 0.13882216811180115, 'timestamp': '2025-10-01 04:21:42.705345', 'step': 7268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:42.739952', 'step': 7268, 'epoch': 1} {'type': 'loss', 'content': 0.23117074370384216, 'timestamp': '2025-10-01 04:21:42.742002', 'step': 7269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:42.778524', 'step': 7269, 'epoch': 1} {'type': 'loss', 'content': 0.1594778299331665, 'timestamp': '2025-10-01 04:21:42.780493', 'step': 7270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:42.811032', 'step': 7270, 'epoch': 1} {'type': 'loss', 'content': 0.20163318514823914, 'timestamp': '2025-10-01 04:21:42.813027', 'step': 7271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:42.844763', 'step': 7271, 'epoch': 1} {'type': 'loss', 'content': 0.15483033657073975, 'timestamp': '2025-10-01 04:21:42.868468', 'step': 7272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:42.905666', 'step': 7272, 'epoch': 1} {'type': 'loss', 'content': 0.1559058129787445, 'timestamp': '2025-10-01 04:21:42.907904', 'step': 7273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:42.946318', 'step': 7273, 'epoch': 1} {'type': 'loss', 'content': 0.1394420713186264, 'timestamp': '2025-10-01 04:21:42.948432', 'step': 7274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:42.984434', 'step': 7274, 'epoch': 1} {'type': 'loss', 'content': 0.07564186304807663, 'timestamp': '2025-10-01 04:21:42.986571', 'step': 7275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:43.024950', 'step': 7275, 'epoch': 1} {'type': 'loss', 'content': 0.18475684523582458, 'timestamp': '2025-10-01 04:21:43.048717', 'step': 7276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:43.088690', 'step': 7276, 'epoch': 1} {'type': 'loss', 'content': 0.0864742174744606, 'timestamp': '2025-10-01 04:21:43.091252', 'step': 7277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:43.123072', 'step': 7277, 'epoch': 1} {'type': 'loss', 'content': 0.233987495303154, 'timestamp': '2025-10-01 04:21:43.133255', 'step': 7278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.174214', 'step': 7278, 'epoch': 1} {'type': 'loss', 'content': 0.10924647003412247, 'timestamp': '2025-10-01 04:21:43.176431', 'step': 7279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.208559', 'step': 7279, 'epoch': 1} {'type': 'loss', 'content': 0.21726247668266296, 'timestamp': '2025-10-01 04:21:43.232085', 'step': 7280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:43.266040', 'step': 7280, 'epoch': 1} {'type': 'loss', 'content': 0.2702232599258423, 'timestamp': '2025-10-01 04:21:43.267986', 'step': 7281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.298496', 'step': 7281, 'epoch': 1} {'type': 'loss', 'content': 0.10718699544668198, 'timestamp': '2025-10-01 04:21:43.300624', 'step': 7282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:43.333558', 'step': 7282, 'epoch': 1} {'type': 'loss', 'content': 0.13771632313728333, 'timestamp': '2025-10-01 04:21:43.335666', 'step': 7283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:43.366882', 'step': 7283, 'epoch': 1} {'type': 'loss', 'content': 0.040881507098674774, 'timestamp': '2025-10-01 04:21:43.390917', 'step': 7284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:43.423607', 'step': 7284, 'epoch': 1} {'type': 'loss', 'content': 0.06128769367933273, 'timestamp': '2025-10-01 04:21:43.425566', 'step': 7285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:43.457103', 'step': 7285, 'epoch': 1} {'type': 'loss', 'content': 0.13265664875507355, 'timestamp': '2025-10-01 04:21:43.459964', 'step': 7286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:43.490389', 'step': 7286, 'epoch': 1} {'type': 'loss', 'content': 0.09704608470201492, 'timestamp': '2025-10-01 04:21:43.493201', 'step': 7287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.525983', 'step': 7287, 'epoch': 1} {'type': 'loss', 'content': 0.12424249202013016, 'timestamp': '2025-10-01 04:21:43.554280', 'step': 7288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:43.586137', 'step': 7288, 'epoch': 1} {'type': 'loss', 'content': 0.08339700847864151, 'timestamp': '2025-10-01 04:21:43.588211', 'step': 7289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.620214', 'step': 7289, 'epoch': 1} {'type': 'loss', 'content': 0.1568768173456192, 'timestamp': '2025-10-01 04:21:43.623745', 'step': 7290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:43.655565', 'step': 7290, 'epoch': 1} {'type': 'loss', 'content': 0.14057093858718872, 'timestamp': '2025-10-01 04:21:43.671929', 'step': 7291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.702392', 'step': 7291, 'epoch': 1} {'type': 'loss', 'content': 0.17635299265384674, 'timestamp': '2025-10-01 04:21:43.726302', 'step': 7292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.759681', 'step': 7292, 'epoch': 1} {'type': 'loss', 'content': 0.1559286266565323, 'timestamp': '2025-10-01 04:21:43.761847', 'step': 7293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.793295', 'step': 7293, 'epoch': 1} {'type': 'loss', 'content': 0.12888334691524506, 'timestamp': '2025-10-01 04:21:43.795446', 'step': 7294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.833256', 'step': 7294, 'epoch': 1} {'type': 'loss', 'content': 0.1593543291091919, 'timestamp': '2025-10-01 04:21:43.835439', 'step': 7295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.875803', 'step': 7295, 'epoch': 1} {'type': 'loss', 'content': 0.07350977510213852, 'timestamp': '2025-10-01 04:21:43.899350', 'step': 7296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:43.940286', 'step': 7296, 'epoch': 1} {'type': 'loss', 'content': 0.14161691069602966, 'timestamp': '2025-10-01 04:21:43.942516', 'step': 7297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:43.974223', 'step': 7297, 'epoch': 1} {'type': 'loss', 'content': 0.14996130764484406, 'timestamp': '2025-10-01 04:21:43.976454', 'step': 7298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:44.011387', 'step': 7298, 'epoch': 1} {'type': 'loss', 'content': 0.09643866866827011, 'timestamp': '2025-10-01 04:21:44.024860', 'step': 7299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:44.061008', 'step': 7299, 'epoch': 1} {'type': 'loss', 'content': 0.13111747801303864, 'timestamp': '2025-10-01 04:21:44.097779', 'step': 7300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:44.133234', 'step': 7300, 'epoch': 1} {'type': 'loss', 'content': 0.17401251196861267, 'timestamp': '2025-10-01 04:21:44.139358', 'step': 7301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:44.174796', 'step': 7301, 'epoch': 1} {'type': 'loss', 'content': 0.15165790915489197, 'timestamp': '2025-10-01 04:21:44.176960', 'step': 7302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:44.209803', 'step': 7302, 'epoch': 1} {'type': 'loss', 'content': 0.14755995571613312, 'timestamp': '2025-10-01 04:21:44.212203', 'step': 7303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:44.244603', 'step': 7303, 'epoch': 1} {'type': 'loss', 'content': 0.0902879387140274, 'timestamp': '2025-10-01 04:21:44.268134', 'step': 7304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:44.305804', 'step': 7304, 'epoch': 1} {'type': 'loss', 'content': 0.145829975605011, 'timestamp': '2025-10-01 04:21:44.308027', 'step': 7305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:44.341029', 'step': 7305, 'epoch': 1} {'type': 'loss', 'content': 0.1753315031528473, 'timestamp': '2025-10-01 04:21:44.343740', 'step': 7306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:44.394126', 'step': 7306, 'epoch': 1} {'type': 'loss', 'content': 0.11582859605550766, 'timestamp': '2025-10-01 04:21:44.398313', 'step': 7307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:44.433282', 'step': 7307, 'epoch': 1} {'type': 'loss', 'content': 0.16031239926815033, 'timestamp': '2025-10-01 04:21:44.457525', 'step': 7308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:44.495126', 'step': 7308, 'epoch': 1} {'type': 'loss', 'content': 0.17921844124794006, 'timestamp': '2025-10-01 04:21:44.497245', 'step': 7309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:44.533461', 'step': 7309, 'epoch': 1} {'type': 'loss', 'content': 0.18047945201396942, 'timestamp': '2025-10-01 04:21:44.542756', 'step': 7310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:44.574334', 'step': 7310, 'epoch': 1} {'type': 'loss', 'content': 0.15722912549972534, 'timestamp': '2025-10-01 04:21:44.576585', 'step': 7311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:44.608064', 'step': 7311, 'epoch': 1} {'type': 'loss', 'content': 0.16893772780895233, 'timestamp': '2025-10-01 04:21:44.631742', 'step': 7312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:44.667849', 'step': 7312, 'epoch': 1} {'type': 'loss', 'content': 0.1543823778629303, 'timestamp': '2025-10-01 04:21:44.670743', 'step': 7313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:44.703800', 'step': 7313, 'epoch': 1} {'type': 'loss', 'content': 0.16310620307922363, 'timestamp': '2025-10-01 04:21:44.708959', 'step': 7314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:44.739811', 'step': 7314, 'epoch': 1} {'type': 'loss', 'content': 0.19159969687461853, 'timestamp': '2025-10-01 04:21:44.742656', 'step': 7315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:44.776516', 'step': 7315, 'epoch': 1} {'type': 'loss', 'content': 0.10561476647853851, 'timestamp': '2025-10-01 04:21:44.800005', 'step': 7316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:44.843716', 'step': 7316, 'epoch': 1} {'type': 'loss', 'content': 0.1964542120695114, 'timestamp': '2025-10-01 04:21:44.846103', 'step': 7317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:44.885613', 'step': 7317, 'epoch': 1} {'type': 'loss', 'content': 0.1812288463115692, 'timestamp': '2025-10-01 04:21:44.891994', 'step': 7318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:44.928077', 'step': 7318, 'epoch': 1} {'type': 'loss', 'content': 0.11088091135025024, 'timestamp': '2025-10-01 04:21:44.930241', 'step': 7319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:44.979048', 'step': 7319, 'epoch': 1} {'type': 'loss', 'content': 0.1983659714460373, 'timestamp': '2025-10-01 04:21:45.003173', 'step': 7320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:45.035967', 'step': 7320, 'epoch': 1} {'type': 'loss', 'content': 0.2602231502532959, 'timestamp': '2025-10-01 04:21:45.038358', 'step': 7321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:45.085710', 'step': 7321, 'epoch': 1} {'type': 'loss', 'content': 0.19595293700695038, 'timestamp': '2025-10-01 04:21:45.089977', 'step': 7322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:45.121946', 'step': 7322, 'epoch': 1} {'type': 'loss', 'content': 0.14615565538406372, 'timestamp': '2025-10-01 04:21:45.124082', 'step': 7323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:45.155693', 'step': 7323, 'epoch': 1} {'type': 'loss', 'content': 0.09080817550420761, 'timestamp': '2025-10-01 04:21:45.180770', 'step': 7324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:45.217231', 'step': 7324, 'epoch': 1} {'type': 'loss', 'content': 0.10206203907728195, 'timestamp': '2025-10-01 04:21:45.224101', 'step': 7325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:45.257113', 'step': 7325, 'epoch': 1} {'type': 'loss', 'content': 0.14578863978385925, 'timestamp': '2025-10-01 04:21:45.261808', 'step': 7326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:45.299549', 'step': 7326, 'epoch': 1} {'type': 'loss', 'content': 0.1173400729894638, 'timestamp': '2025-10-01 04:21:45.304188', 'step': 7327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:45.336077', 'step': 7327, 'epoch': 1} {'type': 'loss', 'content': 0.1642644852399826, 'timestamp': '2025-10-01 04:21:45.367816', 'step': 7328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:45.407863', 'step': 7328, 'epoch': 1} {'type': 'loss', 'content': 0.1032552644610405, 'timestamp': '2025-10-01 04:21:45.410016', 'step': 7329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:45.442452', 'step': 7329, 'epoch': 1} {'type': 'loss', 'content': 0.20444081723690033, 'timestamp': '2025-10-01 04:21:45.444907', 'step': 7330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:45.491701', 'step': 7330, 'epoch': 1} {'type': 'loss', 'content': 0.11082920432090759, 'timestamp': '2025-10-01 04:21:45.495372', 'step': 7331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:45.538894', 'step': 7331, 'epoch': 1} {'type': 'loss', 'content': 0.15427175164222717, 'timestamp': '2025-10-01 04:21:45.562404', 'step': 7332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:45.593294', 'step': 7332, 'epoch': 1} {'type': 'loss', 'content': 0.15115778148174286, 'timestamp': '2025-10-01 04:21:45.595756', 'step': 7333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:45.631525', 'step': 7333, 'epoch': 1} {'type': 'loss', 'content': 0.0936708003282547, 'timestamp': '2025-10-01 04:21:45.638431', 'step': 7334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:45.672522', 'step': 7334, 'epoch': 1} {'type': 'loss', 'content': 0.09469884634017944, 'timestamp': '2025-10-01 04:21:45.677190', 'step': 7335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:45.718784', 'step': 7335, 'epoch': 1} {'type': 'loss', 'content': 0.17332887649536133, 'timestamp': '2025-10-01 04:21:45.742370', 'step': 7336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:45.797237', 'step': 7336, 'epoch': 1} {'type': 'loss', 'content': 0.20561763644218445, 'timestamp': '2025-10-01 04:21:45.799538', 'step': 7337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:45.832912', 'step': 7337, 'epoch': 1} {'type': 'loss', 'content': 0.11323460191488266, 'timestamp': '2025-10-01 04:21:45.835750', 'step': 7338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:45.874124', 'step': 7338, 'epoch': 1} {'type': 'loss', 'content': 0.16509433090686798, 'timestamp': '2025-10-01 04:21:45.877855', 'step': 7339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:45.911128', 'step': 7339, 'epoch': 1} {'type': 'loss', 'content': 0.15617488324642181, 'timestamp': '2025-10-01 04:21:45.934852', 'step': 7340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:45.966461', 'step': 7340, 'epoch': 1} {'type': 'loss', 'content': 0.1906566470861435, 'timestamp': '2025-10-01 04:21:45.968534', 'step': 7341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:46.008531', 'step': 7341, 'epoch': 1} {'type': 'loss', 'content': 0.16841335594654083, 'timestamp': '2025-10-01 04:21:46.010964', 'step': 7342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:46.042452', 'step': 7342, 'epoch': 1} {'type': 'loss', 'content': 0.1299809366464615, 'timestamp': '2025-10-01 04:21:46.044881', 'step': 7343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:46.076458', 'step': 7343, 'epoch': 1} {'type': 'loss', 'content': 0.2623736262321472, 'timestamp': '2025-10-01 04:21:46.101063', 'step': 7344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:46.136472', 'step': 7344, 'epoch': 1} {'type': 'loss', 'content': 0.08876238763332367, 'timestamp': '2025-10-01 04:21:46.138644', 'step': 7345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:46.175881', 'step': 7345, 'epoch': 1} {'type': 'loss', 'content': 0.12611640989780426, 'timestamp': '2025-10-01 04:21:46.178455', 'step': 7346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:46.217645', 'step': 7346, 'epoch': 1} {'type': 'loss', 'content': 0.11880571395158768, 'timestamp': '2025-10-01 04:21:46.219873', 'step': 7347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:46.252368', 'step': 7347, 'epoch': 1} {'type': 'loss', 'content': 0.12332847714424133, 'timestamp': '2025-10-01 04:21:46.276408', 'step': 7348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:46.309849', 'step': 7348, 'epoch': 1} {'type': 'loss', 'content': 0.07884740829467773, 'timestamp': '2025-10-01 04:21:46.312182', 'step': 7349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:46.344265', 'step': 7349, 'epoch': 1} {'type': 'loss', 'content': 0.09615771472454071, 'timestamp': '2025-10-01 04:21:46.346657', 'step': 7350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:46.378619', 'step': 7350, 'epoch': 1} {'type': 'loss', 'content': 0.11381112039089203, 'timestamp': '2025-10-01 04:21:46.381381', 'step': 7351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:46.414360', 'step': 7351, 'epoch': 1} {'type': 'loss', 'content': 0.16524909436702728, 'timestamp': '2025-10-01 04:21:46.438026', 'step': 7352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:46.475313', 'step': 7352, 'epoch': 1} {'type': 'loss', 'content': 0.1570814996957779, 'timestamp': '2025-10-01 04:21:46.478025', 'step': 7353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:46.518602', 'step': 7353, 'epoch': 1} {'type': 'loss', 'content': 0.17914101481437683, 'timestamp': '2025-10-01 04:21:46.522097', 'step': 7354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:46.557369', 'step': 7354, 'epoch': 1} {'type': 'loss', 'content': 0.21124359965324402, 'timestamp': '2025-10-01 04:21:46.559756', 'step': 7355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:46.595050', 'step': 7355, 'epoch': 1} {'type': 'loss', 'content': 0.09559397399425507, 'timestamp': '2025-10-01 04:21:46.618677', 'step': 7356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:46.666867', 'step': 7356, 'epoch': 1} {'type': 'loss', 'content': 0.22113573551177979, 'timestamp': '2025-10-01 04:21:46.669024', 'step': 7357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:46.700592', 'step': 7357, 'epoch': 1} {'type': 'loss', 'content': 0.08922301232814789, 'timestamp': '2025-10-01 04:21:46.702773', 'step': 7358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:46.736583', 'step': 7358, 'epoch': 1} {'type': 'loss', 'content': 0.16415199637413025, 'timestamp': '2025-10-01 04:21:46.750121', 'step': 7359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:46.782797', 'step': 7359, 'epoch': 1} {'type': 'loss', 'content': 0.13275499641895294, 'timestamp': '2025-10-01 04:21:46.806575', 'step': 7360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:46.846002', 'step': 7360, 'epoch': 1} {'type': 'loss', 'content': 0.18490654230117798, 'timestamp': '2025-10-01 04:21:46.849240', 'step': 7361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:46.880561', 'step': 7361, 'epoch': 1} {'type': 'loss', 'content': 0.11263670772314072, 'timestamp': '2025-10-01 04:21:46.883112', 'step': 7362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:46.918300', 'step': 7362, 'epoch': 1} {'type': 'loss', 'content': 0.08268825709819794, 'timestamp': '2025-10-01 04:21:46.920429', 'step': 7363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:46.953514', 'step': 7363, 'epoch': 1} {'type': 'loss', 'content': 0.22191248834133148, 'timestamp': '2025-10-01 04:21:46.977212', 'step': 7364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:47.022332', 'step': 7364, 'epoch': 1} {'type': 'loss', 'content': 0.22532640397548676, 'timestamp': '2025-10-01 04:21:47.039846', 'step': 7365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:47.073003', 'step': 7365, 'epoch': 1} {'type': 'loss', 'content': 0.07216255366802216, 'timestamp': '2025-10-01 04:21:47.075387', 'step': 7366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:47.113281', 'step': 7366, 'epoch': 1} {'type': 'loss', 'content': 0.12143857777118683, 'timestamp': '2025-10-01 04:21:47.115716', 'step': 7367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:47.156572', 'step': 7367, 'epoch': 1} {'type': 'loss', 'content': 0.20116586983203888, 'timestamp': '2025-10-01 04:21:47.180186', 'step': 7368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.211577', 'step': 7368, 'epoch': 1} {'type': 'loss', 'content': 0.15982043743133545, 'timestamp': '2025-10-01 04:21:47.213381', 'step': 7369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.247910', 'step': 7369, 'epoch': 1} {'type': 'loss', 'content': 0.1238301619887352, 'timestamp': '2025-10-01 04:21:47.249927', 'step': 7370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:21:47.282476', 'step': 7370, 'epoch': 1} {'type': 'loss', 'content': 0.11579367518424988, 'timestamp': '2025-10-01 04:21:47.286778', 'step': 7371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.322728', 'step': 7371, 'epoch': 1} {'type': 'loss', 'content': 0.1425945907831192, 'timestamp': '2025-10-01 04:21:47.346799', 'step': 7372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.379316', 'step': 7372, 'epoch': 1} {'type': 'loss', 'content': 0.1688992977142334, 'timestamp': '2025-10-01 04:21:47.381454', 'step': 7373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:47.413525', 'step': 7373, 'epoch': 1} {'type': 'loss', 'content': 0.14112938940525055, 'timestamp': '2025-10-01 04:21:47.416249', 'step': 7374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.448144', 'step': 7374, 'epoch': 1} {'type': 'loss', 'content': 0.13907945156097412, 'timestamp': '2025-10-01 04:21:47.450601', 'step': 7375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:47.488804', 'step': 7375, 'epoch': 1} {'type': 'loss', 'content': 0.22473615407943726, 'timestamp': '2025-10-01 04:21:47.512365', 'step': 7376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:47.544975', 'step': 7376, 'epoch': 1} {'type': 'loss', 'content': 0.186827152967453, 'timestamp': '2025-10-01 04:21:47.547188', 'step': 7377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:47.580220', 'step': 7377, 'epoch': 1} {'type': 'loss', 'content': 0.1291058361530304, 'timestamp': '2025-10-01 04:21:47.582304', 'step': 7378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.612911', 'step': 7378, 'epoch': 1} {'type': 'loss', 'content': 0.10767822712659836, 'timestamp': '2025-10-01 04:21:47.620352', 'step': 7379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:47.657986', 'step': 7379, 'epoch': 1} {'type': 'loss', 'content': 0.14140982925891876, 'timestamp': '2025-10-01 04:21:47.681648', 'step': 7380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.723597', 'step': 7380, 'epoch': 1} {'type': 'loss', 'content': 0.21149176359176636, 'timestamp': '2025-10-01 04:21:47.725932', 'step': 7381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:47.763387', 'step': 7381, 'epoch': 1} {'type': 'loss', 'content': 0.19241581857204437, 'timestamp': '2025-10-01 04:21:47.765552', 'step': 7382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.798802', 'step': 7382, 'epoch': 1} {'type': 'loss', 'content': 0.14653266966342926, 'timestamp': '2025-10-01 04:21:47.804440', 'step': 7383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.844318', 'step': 7383, 'epoch': 1} {'type': 'loss', 'content': 0.13454635441303253, 'timestamp': '2025-10-01 04:21:47.869483', 'step': 7384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.904678', 'step': 7384, 'epoch': 1} {'type': 'loss', 'content': 0.22722162306308746, 'timestamp': '2025-10-01 04:21:47.907414', 'step': 7385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:47.945911', 'step': 7385, 'epoch': 1} {'type': 'loss', 'content': 0.12652620673179626, 'timestamp': '2025-10-01 04:21:47.948053', 'step': 7386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:47.980969', 'step': 7386, 'epoch': 1} {'type': 'loss', 'content': 0.08577833324670792, 'timestamp': '2025-10-01 04:21:47.983139', 'step': 7387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:48.019724', 'step': 7387, 'epoch': 1} {'type': 'loss', 'content': 0.11735175549983978, 'timestamp': '2025-10-01 04:21:48.043235', 'step': 7388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:48.084159', 'step': 7388, 'epoch': 1} {'type': 'loss', 'content': 0.20547662675380707, 'timestamp': '2025-10-01 04:21:48.086364', 'step': 7389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:48.123744', 'step': 7389, 'epoch': 1} {'type': 'loss', 'content': 0.15196837484836578, 'timestamp': '2025-10-01 04:21:48.126346', 'step': 7390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:48.161412', 'step': 7390, 'epoch': 1} {'type': 'loss', 'content': 0.24811851978302002, 'timestamp': '2025-10-01 04:21:48.163686', 'step': 7391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:48.195453', 'step': 7391, 'epoch': 1} {'type': 'loss', 'content': 0.14109936356544495, 'timestamp': '2025-10-01 04:21:48.218986', 'step': 7392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:48.250923', 'step': 7392, 'epoch': 1} {'type': 'loss', 'content': 0.19724924862384796, 'timestamp': '2025-10-01 04:21:48.253237', 'step': 7393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:48.295065', 'step': 7393, 'epoch': 1} {'type': 'loss', 'content': 0.14539384841918945, 'timestamp': '2025-10-01 04:21:48.297590', 'step': 7394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:48.336237', 'step': 7394, 'epoch': 1} {'type': 'loss', 'content': 0.1388971507549286, 'timestamp': '2025-10-01 04:21:48.338840', 'step': 7395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:48.372542', 'step': 7395, 'epoch': 1} {'type': 'loss', 'content': 0.11692236363887787, 'timestamp': '2025-10-01 04:21:48.406461', 'step': 7396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:48.446064', 'step': 7396, 'epoch': 1} {'type': 'loss', 'content': 0.11266425251960754, 'timestamp': '2025-10-01 04:21:48.449253', 'step': 7397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:48.484693', 'step': 7397, 'epoch': 1} {'type': 'loss', 'content': 0.10130497813224792, 'timestamp': '2025-10-01 04:21:48.487550', 'step': 7398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:48.522138', 'step': 7398, 'epoch': 1} {'type': 'loss', 'content': 0.13045860826969147, 'timestamp': '2025-10-01 04:21:48.524588', 'step': 7399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:48.556470', 'step': 7399, 'epoch': 1} {'type': 'loss', 'content': 0.1195533350110054, 'timestamp': '2025-10-01 04:21:48.580217', 'step': 7400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:48.612562', 'step': 7400, 'epoch': 1} {'type': 'loss', 'content': 0.219703808426857, 'timestamp': '2025-10-01 04:21:48.614798', 'step': 7401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:48.647565', 'step': 7401, 'epoch': 1} {'type': 'loss', 'content': 0.1521042287349701, 'timestamp': '2025-10-01 04:21:48.649971', 'step': 7402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:48.681806', 'step': 7402, 'epoch': 1} {'type': 'loss', 'content': 0.13720838725566864, 'timestamp': '2025-10-01 04:21:48.684018', 'step': 7403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:48.727873', 'step': 7403, 'epoch': 1} {'type': 'loss', 'content': 0.1757577806711197, 'timestamp': '2025-10-01 04:21:48.753082', 'step': 7404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:48.800846', 'step': 7404, 'epoch': 1} {'type': 'loss', 'content': 0.10469774901866913, 'timestamp': '2025-10-01 04:21:48.803483', 'step': 7405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:48.844537', 'step': 7405, 'epoch': 1} {'type': 'loss', 'content': 0.1313985139131546, 'timestamp': '2025-10-01 04:21:48.846877', 'step': 7406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:48.879748', 'step': 7406, 'epoch': 1} {'type': 'loss', 'content': 0.20081719756126404, 'timestamp': '2025-10-01 04:21:48.884241', 'step': 7407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:48.919437', 'step': 7407, 'epoch': 1} {'type': 'loss', 'content': 0.30252590775489807, 'timestamp': '2025-10-01 04:21:48.943639', 'step': 7408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:48.975261', 'step': 7408, 'epoch': 1} {'type': 'loss', 'content': 0.22681599855422974, 'timestamp': '2025-10-01 04:21:48.979479', 'step': 7409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:49.015544', 'step': 7409, 'epoch': 1} {'type': 'loss', 'content': 0.10939567536115646, 'timestamp': '2025-10-01 04:21:49.018407', 'step': 7410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:49.049382', 'step': 7410, 'epoch': 1} {'type': 'loss', 'content': 0.07460926473140717, 'timestamp': '2025-10-01 04:21:49.052125', 'step': 7411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:49.088026', 'step': 7411, 'epoch': 1} {'type': 'loss', 'content': 0.11736282706260681, 'timestamp': '2025-10-01 04:21:49.111977', 'step': 7412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:49.146450', 'step': 7412, 'epoch': 1} {'type': 'loss', 'content': 0.18362964689731598, 'timestamp': '2025-10-01 04:21:49.154121', 'step': 7413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:49.188348', 'step': 7413, 'epoch': 1} {'type': 'loss', 'content': 0.05374779924750328, 'timestamp': '2025-10-01 04:21:49.194928', 'step': 7414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:49.229989', 'step': 7414, 'epoch': 1} {'type': 'loss', 'content': 0.06279197335243225, 'timestamp': '2025-10-01 04:21:49.236858', 'step': 7415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:49.273536', 'step': 7415, 'epoch': 1} {'type': 'loss', 'content': 0.11385884881019592, 'timestamp': '2025-10-01 04:21:49.297463', 'step': 7416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:49.332156', 'step': 7416, 'epoch': 1} {'type': 'loss', 'content': 0.21513214707374573, 'timestamp': '2025-10-01 04:21:49.334245', 'step': 7417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:49.366536', 'step': 7417, 'epoch': 1} {'type': 'loss', 'content': 0.06791464984416962, 'timestamp': '2025-10-01 04:21:49.368975', 'step': 7418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:49.400697', 'step': 7418, 'epoch': 1} {'type': 'loss', 'content': 0.15231435000896454, 'timestamp': '2025-10-01 04:21:49.403184', 'step': 7419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:49.440864', 'step': 7419, 'epoch': 1} {'type': 'loss', 'content': 0.1453525424003601, 'timestamp': '2025-10-01 04:21:49.465106', 'step': 7420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:49.498388', 'step': 7420, 'epoch': 1} {'type': 'loss', 'content': 0.08744237571954727, 'timestamp': '2025-10-01 04:21:49.500513', 'step': 7421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:49.543388', 'step': 7421, 'epoch': 1} {'type': 'loss', 'content': 0.17187830805778503, 'timestamp': '2025-10-01 04:21:49.548216', 'step': 7422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:49.580036', 'step': 7422, 'epoch': 1} {'type': 'loss', 'content': 0.16667544841766357, 'timestamp': '2025-10-01 04:21:49.582901', 'step': 7423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:49.620849', 'step': 7423, 'epoch': 1} {'type': 'loss', 'content': 0.09625907987356186, 'timestamp': '2025-10-01 04:21:49.644646', 'step': 7424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:49.678186', 'step': 7424, 'epoch': 1} {'type': 'loss', 'content': 0.24303196370601654, 'timestamp': '2025-10-01 04:21:49.681512', 'step': 7425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:49.724234', 'step': 7425, 'epoch': 1} {'type': 'loss', 'content': 0.10518775880336761, 'timestamp': '2025-10-01 04:21:49.727231', 'step': 7426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:49.773329', 'step': 7426, 'epoch': 1} {'type': 'loss', 'content': 0.10082045197486877, 'timestamp': '2025-10-01 04:21:49.776083', 'step': 7427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:49.810101', 'step': 7427, 'epoch': 1} {'type': 'loss', 'content': 0.07759273797273636, 'timestamp': '2025-10-01 04:21:49.838421', 'step': 7428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:49.876223', 'step': 7428, 'epoch': 1} {'type': 'loss', 'content': 0.17527972161769867, 'timestamp': '2025-10-01 04:21:49.878454', 'step': 7429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:49.928885', 'step': 7429, 'epoch': 1} {'type': 'loss', 'content': 0.11760156601667404, 'timestamp': '2025-10-01 04:21:49.931045', 'step': 7430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:49.972687', 'step': 7430, 'epoch': 1} {'type': 'loss', 'content': 0.18239501118659973, 'timestamp': '2025-10-01 04:21:49.978407', 'step': 7431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:50.032612', 'step': 7431, 'epoch': 1} {'type': 'loss', 'content': 0.1772499531507492, 'timestamp': '2025-10-01 04:21:50.056448', 'step': 7432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:50.101240', 'step': 7432, 'epoch': 1} {'type': 'loss', 'content': 0.1418871432542801, 'timestamp': '2025-10-01 04:21:50.103294', 'step': 7433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:21:50.150930', 'step': 7433, 'epoch': 1} {'type': 'loss', 'content': 0.1624564379453659, 'timestamp': '2025-10-01 04:21:50.153312', 'step': 7434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:50.189693', 'step': 7434, 'epoch': 1} {'type': 'loss', 'content': 0.14573809504508972, 'timestamp': '2025-10-01 04:21:50.191912', 'step': 7435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:50.238073', 'step': 7435, 'epoch': 1} {'type': 'loss', 'content': 0.12953849136829376, 'timestamp': '2025-10-01 04:21:50.261961', 'step': 7436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:50.314211', 'step': 7436, 'epoch': 1} {'type': 'loss', 'content': 0.17196601629257202, 'timestamp': '2025-10-01 04:21:50.316580', 'step': 7437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:50.352854', 'step': 7437, 'epoch': 1} {'type': 'loss', 'content': 0.07267166674137115, 'timestamp': '2025-10-01 04:21:50.366025', 'step': 7438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:50.398318', 'step': 7438, 'epoch': 1} {'type': 'loss', 'content': 0.1211174950003624, 'timestamp': '2025-10-01 04:21:50.403004', 'step': 7439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:50.443726', 'step': 7439, 'epoch': 1} {'type': 'loss', 'content': 0.19985154271125793, 'timestamp': '2025-10-01 04:21:50.468249', 'step': 7440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:50.507055', 'step': 7440, 'epoch': 1} {'type': 'loss', 'content': 0.15366533398628235, 'timestamp': '2025-10-01 04:21:50.510894', 'step': 7441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:50.545146', 'step': 7441, 'epoch': 1} {'type': 'loss', 'content': 0.12861429154872894, 'timestamp': '2025-10-01 04:21:50.547815', 'step': 7442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:21:50.580654', 'step': 7442, 'epoch': 1} {'type': 'loss', 'content': 0.2617689371109009, 'timestamp': '2025-10-01 04:21:50.585294', 'step': 7443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:50.616789', 'step': 7443, 'epoch': 1} {'type': 'loss', 'content': 0.061394691467285156, 'timestamp': '2025-10-01 04:21:50.640507', 'step': 7444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:50.677752', 'step': 7444, 'epoch': 1} {'type': 'loss', 'content': 0.1332671195268631, 'timestamp': '2025-10-01 04:21:50.680256', 'step': 7445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:50.716415', 'step': 7445, 'epoch': 1} {'type': 'loss', 'content': 0.12613612413406372, 'timestamp': '2025-10-01 04:21:50.718303', 'step': 7446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:50.757801', 'step': 7446, 'epoch': 1} {'type': 'loss', 'content': 0.2360333353281021, 'timestamp': '2025-10-01 04:21:50.760367', 'step': 7447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:50.791940', 'step': 7447, 'epoch': 1} {'type': 'loss', 'content': 0.1552291363477707, 'timestamp': '2025-10-01 04:21:50.815739', 'step': 7448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:21:50.854972', 'step': 7448, 'epoch': 1} {'type': 'loss', 'content': 0.10876324772834778, 'timestamp': '2025-10-01 04:21:50.857086', 'step': 7449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:50.888289', 'step': 7449, 'epoch': 1} {'type': 'loss', 'content': 0.13962262868881226, 'timestamp': '2025-10-01 04:21:50.890770', 'step': 7450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:50.930473', 'step': 7450, 'epoch': 1} {'type': 'loss', 'content': 0.165609672665596, 'timestamp': '2025-10-01 04:21:50.933618', 'step': 7451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:50.965328', 'step': 7451, 'epoch': 1} {'type': 'loss', 'content': 0.10108764469623566, 'timestamp': '2025-10-01 04:21:50.989244', 'step': 7452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:21:51.021806', 'step': 7452, 'epoch': 1} {'type': 'loss', 'content': 0.09677517414093018, 'timestamp': '2025-10-01 04:21:51.023889', 'step': 7453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:21:51.058863', 'step': 7453, 'epoch': 1} {'type': 'loss', 'content': 0.05585024878382683, 'timestamp': '2025-10-01 04:21:51.061213', 'step': 7454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:21:51.096122', 'step': 7454, 'epoch': 1} {'type': 'loss', 'content': 0.35008689761161804, 'timestamp': '2025-10-01 04:21:51.100221', 'step': 7455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:21:51.135147', 'step': 7455, 'epoch': 1} {'type': 'loss', 'content': 0.14364497363567352, 'timestamp': '2025-10-01 04:21:51.158886', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:22:02.894449', 'step': 7456, 'epoch': 1} {'type': 'pplx', 'content': 9090.192893244199, 'timestamp': '2025-10-01 04:22:02.897406', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:02.929826', 'step': 7456, 'epoch': 1} {'type': 'loss', 'content': 0.1451292484998703, 'timestamp': '2025-10-01 04:22:02.931923', 'step': 7457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:02.982811', 'step': 7457, 'epoch': 1} {'type': 'loss', 'content': 0.14393840730190277, 'timestamp': '2025-10-01 04:22:02.985043', 'step': 7458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:03.030602', 'step': 7458, 'epoch': 1} {'type': 'loss', 'content': 0.07462815940380096, 'timestamp': '2025-10-01 04:22:03.032782', 'step': 7459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1542724875376}, 'timestamp': '2025-10-01 04:22:03.105240', 'step': 7459, 'epoch': 1} {'type': 'loss', 'content': 0.390523761510849, 'timestamp': '2025-10-01 04:22:03.129150', 'step': 7460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:03.161277', 'step': 7460, 'epoch': 2} {'type': 'loss', 'content': 0.08392694592475891, 'timestamp': '2025-10-01 04:22:03.163657', 'step': 7461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:03.201222', 'step': 7461, 'epoch': 2} {'type': 'loss', 'content': 0.10864384472370148, 'timestamp': '2025-10-01 04:22:03.203759', 'step': 7462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:03.245893', 'step': 7462, 'epoch': 2} {'type': 'loss', 'content': 0.13363981246948242, 'timestamp': '2025-10-01 04:22:03.248148', 'step': 7463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:03.289851', 'step': 7463, 'epoch': 2} {'type': 'loss', 'content': 0.12367842346429825, 'timestamp': '2025-10-01 04:22:03.320693', 'step': 7464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:03.359477', 'step': 7464, 'epoch': 2} {'type': 'loss', 'content': 0.21378512680530548, 'timestamp': '2025-10-01 04:22:03.361634', 'step': 7465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:03.405298', 'step': 7465, 'epoch': 2} {'type': 'loss', 'content': 0.08186061680316925, 'timestamp': '2025-10-01 04:22:03.407654', 'step': 7466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:03.448231', 'step': 7466, 'epoch': 2} {'type': 'loss', 'content': 0.180683434009552, 'timestamp': '2025-10-01 04:22:03.450920', 'step': 7467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:03.490723', 'step': 7467, 'epoch': 2} {'type': 'loss', 'content': 0.07323640584945679, 'timestamp': '2025-10-01 04:22:03.514609', 'step': 7468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:03.547629', 'step': 7468, 'epoch': 2} {'type': 'loss', 'content': 0.09554076939821243, 'timestamp': '2025-10-01 04:22:03.549800', 'step': 7469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:03.589366', 'step': 7469, 'epoch': 2} {'type': 'loss', 'content': 0.13389086723327637, 'timestamp': '2025-10-01 04:22:03.591757', 'step': 7470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:03.632360', 'step': 7470, 'epoch': 2} {'type': 'loss', 'content': 0.09154783934354782, 'timestamp': '2025-10-01 04:22:03.634501', 'step': 7471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:03.674351', 'step': 7471, 'epoch': 2} {'type': 'loss', 'content': 0.15887384116649628, 'timestamp': '2025-10-01 04:22:03.698145', 'step': 7472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:03.729804', 'step': 7472, 'epoch': 2} {'type': 'loss', 'content': 0.10044229030609131, 'timestamp': '2025-10-01 04:22:03.731908', 'step': 7473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:03.764200', 'step': 7473, 'epoch': 2} {'type': 'loss', 'content': 0.09122386574745178, 'timestamp': '2025-10-01 04:22:03.766539', 'step': 7474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:03.803864', 'step': 7474, 'epoch': 2} {'type': 'loss', 'content': 0.11108077317476273, 'timestamp': '2025-10-01 04:22:03.806162', 'step': 7475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:03.837012', 'step': 7475, 'epoch': 2} {'type': 'loss', 'content': 0.17443574965000153, 'timestamp': '2025-10-01 04:22:03.861025', 'step': 7476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:03.904199', 'step': 7476, 'epoch': 2} {'type': 'loss', 'content': 0.061599936336278915, 'timestamp': '2025-10-01 04:22:03.906438', 'step': 7477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:03.945865', 'step': 7477, 'epoch': 2} {'type': 'loss', 'content': 0.15153445303440094, 'timestamp': '2025-10-01 04:22:03.947912', 'step': 7478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:03.983215', 'step': 7478, 'epoch': 2} {'type': 'loss', 'content': 0.20962518453598022, 'timestamp': '2025-10-01 04:22:03.985624', 'step': 7479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:04.019932', 'step': 7479, 'epoch': 2} {'type': 'loss', 'content': 0.05859464779496193, 'timestamp': '2025-10-01 04:22:04.044051', 'step': 7480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:04.076063', 'step': 7480, 'epoch': 2} {'type': 'loss', 'content': 0.0683269202709198, 'timestamp': '2025-10-01 04:22:04.078093', 'step': 7481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:04.119907', 'step': 7481, 'epoch': 2} {'type': 'loss', 'content': 0.13572217524051666, 'timestamp': '2025-10-01 04:22:04.122481', 'step': 7482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:04.155969', 'step': 7482, 'epoch': 2} {'type': 'loss', 'content': 0.07068349421024323, 'timestamp': '2025-10-01 04:22:04.158183', 'step': 7483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:04.191171', 'step': 7483, 'epoch': 2} {'type': 'loss', 'content': 0.1412963569164276, 'timestamp': '2025-10-01 04:22:04.214830', 'step': 7484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:04.263628', 'step': 7484, 'epoch': 2} {'type': 'loss', 'content': 0.056761931627988815, 'timestamp': '2025-10-01 04:22:04.265893', 'step': 7485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:04.304324', 'step': 7485, 'epoch': 2} {'type': 'loss', 'content': 0.13392457365989685, 'timestamp': '2025-10-01 04:22:04.307236', 'step': 7486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:04.342107', 'step': 7486, 'epoch': 2} {'type': 'loss', 'content': 0.19780923426151276, 'timestamp': '2025-10-01 04:22:04.344347', 'step': 7487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:04.378846', 'step': 7487, 'epoch': 2} {'type': 'loss', 'content': 0.10106268525123596, 'timestamp': '2025-10-01 04:22:04.402396', 'step': 7488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:04.437136', 'step': 7488, 'epoch': 2} {'type': 'loss', 'content': 0.10639949142932892, 'timestamp': '2025-10-01 04:22:04.439230', 'step': 7489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:04.481358', 'step': 7489, 'epoch': 2} {'type': 'loss', 'content': 0.07400000095367432, 'timestamp': '2025-10-01 04:22:04.483815', 'step': 7490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:04.525816', 'step': 7490, 'epoch': 2} {'type': 'loss', 'content': 0.14311832189559937, 'timestamp': '2025-10-01 04:22:04.527979', 'step': 7491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:04.560948', 'step': 7491, 'epoch': 2} {'type': 'loss', 'content': 0.0825294703245163, 'timestamp': '2025-10-01 04:22:04.585261', 'step': 7492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:04.618254', 'step': 7492, 'epoch': 2} {'type': 'loss', 'content': 0.1957208812236786, 'timestamp': '2025-10-01 04:22:04.620338', 'step': 7493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:04.654420', 'step': 7493, 'epoch': 2} {'type': 'loss', 'content': 0.1482170969247818, 'timestamp': '2025-10-01 04:22:04.656589', 'step': 7494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:04.688516', 'step': 7494, 'epoch': 2} {'type': 'loss', 'content': 0.17451493442058563, 'timestamp': '2025-10-01 04:22:04.690859', 'step': 7495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:04.720967', 'step': 7495, 'epoch': 2} {'type': 'loss', 'content': 0.0593416690826416, 'timestamp': '2025-10-01 04:22:04.744425', 'step': 7496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:04.781213', 'step': 7496, 'epoch': 2} {'type': 'loss', 'content': 0.12356624752283096, 'timestamp': '2025-10-01 04:22:04.783361', 'step': 7497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:04.815176', 'step': 7497, 'epoch': 2} {'type': 'loss', 'content': 0.08759988844394684, 'timestamp': '2025-10-01 04:22:04.818069', 'step': 7498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:04.848850', 'step': 7498, 'epoch': 2} {'type': 'loss', 'content': 0.17444400489330292, 'timestamp': '2025-10-01 04:22:04.851290', 'step': 7499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:04.886570', 'step': 7499, 'epoch': 2} {'type': 'loss', 'content': 0.13887082040309906, 'timestamp': '2025-10-01 04:22:04.910359', 'step': 7500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 7500', 'timestamp': '2025-10-01 04:22:10.212231', 'step': 7500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:10.257386', 'step': 7500, 'epoch': 2} {'type': 'loss', 'content': 0.0951651856303215, 'timestamp': '2025-10-01 04:22:10.259481', 'step': 7501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:10.291110', 'step': 7501, 'epoch': 2} {'type': 'loss', 'content': 0.14692877233028412, 'timestamp': '2025-10-01 04:22:10.293534', 'step': 7502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:10.325626', 'step': 7502, 'epoch': 2} {'type': 'loss', 'content': 0.13832427561283112, 'timestamp': '2025-10-01 04:22:10.327753', 'step': 7503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:10.362659', 'step': 7503, 'epoch': 2} {'type': 'loss', 'content': 0.16758541762828827, 'timestamp': '2025-10-01 04:22:10.386303', 'step': 7504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:10.419247', 'step': 7504, 'epoch': 2} {'type': 'loss', 'content': 0.16639411449432373, 'timestamp': '2025-10-01 04:22:10.421384', 'step': 7505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:10.452773', 'step': 7505, 'epoch': 2} {'type': 'loss', 'content': 0.1264060139656067, 'timestamp': '2025-10-01 04:22:10.454857', 'step': 7506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:10.496247', 'step': 7506, 'epoch': 2} {'type': 'loss', 'content': 0.13844136893749237, 'timestamp': '2025-10-01 04:22:10.498538', 'step': 7507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:10.534852', 'step': 7507, 'epoch': 2} {'type': 'loss', 'content': 0.1764095574617386, 'timestamp': '2025-10-01 04:22:10.558934', 'step': 7508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:10.591174', 'step': 7508, 'epoch': 2} {'type': 'loss', 'content': 0.16991589963436127, 'timestamp': '2025-10-01 04:22:10.594772', 'step': 7509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:10.627517', 'step': 7509, 'epoch': 2} {'type': 'loss', 'content': 0.10005535185337067, 'timestamp': '2025-10-01 04:22:10.629749', 'step': 7510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:10.661523', 'step': 7510, 'epoch': 2} {'type': 'loss', 'content': 0.146151602268219, 'timestamp': '2025-10-01 04:22:10.664451', 'step': 7511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:10.695800', 'step': 7511, 'epoch': 2} {'type': 'loss', 'content': 0.20930515229701996, 'timestamp': '2025-10-01 04:22:10.719569', 'step': 7512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:10.753025', 'step': 7512, 'epoch': 2} {'type': 'loss', 'content': 0.10758494585752487, 'timestamp': '2025-10-01 04:22:10.755044', 'step': 7513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:10.787829', 'step': 7513, 'epoch': 2} {'type': 'loss', 'content': 0.10526614636182785, 'timestamp': '2025-10-01 04:22:10.791260', 'step': 7514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:10.823617', 'step': 7514, 'epoch': 2} {'type': 'loss', 'content': 0.13187111914157867, 'timestamp': '2025-10-01 04:22:10.826238', 'step': 7515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:10.858622', 'step': 7515, 'epoch': 2} {'type': 'loss', 'content': 0.08968613296747208, 'timestamp': '2025-10-01 04:22:10.882221', 'step': 7516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:10.914023', 'step': 7516, 'epoch': 2} {'type': 'loss', 'content': 0.11299891769886017, 'timestamp': '2025-10-01 04:22:10.916434', 'step': 7517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:10.949622', 'step': 7517, 'epoch': 2} {'type': 'loss', 'content': 0.15379735827445984, 'timestamp': '2025-10-01 04:22:10.951866', 'step': 7518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:10.983374', 'step': 7518, 'epoch': 2} {'type': 'loss', 'content': 0.1530008167028427, 'timestamp': '2025-10-01 04:22:10.985950', 'step': 7519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:11.017472', 'step': 7519, 'epoch': 2} {'type': 'loss', 'content': 0.17388121783733368, 'timestamp': '2025-10-01 04:22:11.041161', 'step': 7520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:11.072230', 'step': 7520, 'epoch': 2} {'type': 'loss', 'content': 0.13597887754440308, 'timestamp': '2025-10-01 04:22:11.074480', 'step': 7521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:11.105917', 'step': 7521, 'epoch': 2} {'type': 'loss', 'content': 0.09477666020393372, 'timestamp': '2025-10-01 04:22:11.107916', 'step': 7522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:11.138456', 'step': 7522, 'epoch': 2} {'type': 'loss', 'content': 0.21863333880901337, 'timestamp': '2025-10-01 04:22:11.140553', 'step': 7523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:11.176792', 'step': 7523, 'epoch': 2} {'type': 'loss', 'content': 0.09003081172704697, 'timestamp': '2025-10-01 04:22:11.200348', 'step': 7524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:11.231951', 'step': 7524, 'epoch': 2} {'type': 'loss', 'content': 0.07374763488769531, 'timestamp': '2025-10-01 04:22:11.235076', 'step': 7525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:11.265310', 'step': 7525, 'epoch': 2} {'type': 'loss', 'content': 0.1587015986442566, 'timestamp': '2025-10-01 04:22:11.267284', 'step': 7526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:11.299595', 'step': 7526, 'epoch': 2} {'type': 'loss', 'content': 0.054415758699178696, 'timestamp': '2025-10-01 04:22:11.301924', 'step': 7527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:11.333666', 'step': 7527, 'epoch': 2} {'type': 'loss', 'content': 0.14142067730426788, 'timestamp': '2025-10-01 04:22:11.357118', 'step': 7528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:11.393683', 'step': 7528, 'epoch': 2} {'type': 'loss', 'content': 0.19632218778133392, 'timestamp': '2025-10-01 04:22:11.395669', 'step': 7529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:11.433158', 'step': 7529, 'epoch': 2} {'type': 'loss', 'content': 0.1825423240661621, 'timestamp': '2025-10-01 04:22:11.435277', 'step': 7530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:11.468196', 'step': 7530, 'epoch': 2} {'type': 'loss', 'content': 0.10529369860887527, 'timestamp': '2025-10-01 04:22:11.470259', 'step': 7531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:11.502498', 'step': 7531, 'epoch': 2} {'type': 'loss', 'content': 0.12356261163949966, 'timestamp': '2025-10-01 04:22:11.526208', 'step': 7532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:11.560103', 'step': 7532, 'epoch': 2} {'type': 'loss', 'content': 0.15932394564151764, 'timestamp': '2025-10-01 04:22:11.562272', 'step': 7533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:11.593392', 'step': 7533, 'epoch': 2} {'type': 'loss', 'content': 0.07268449664115906, 'timestamp': '2025-10-01 04:22:11.595665', 'step': 7534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:11.626298', 'step': 7534, 'epoch': 2} {'type': 'loss', 'content': 0.12407227605581284, 'timestamp': '2025-10-01 04:22:11.628221', 'step': 7535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:11.659597', 'step': 7535, 'epoch': 2} {'type': 'loss', 'content': 0.22235582768917084, 'timestamp': '2025-10-01 04:22:11.683094', 'step': 7536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:11.713444', 'step': 7536, 'epoch': 2} {'type': 'loss', 'content': 0.09789907932281494, 'timestamp': '2025-10-01 04:22:11.715517', 'step': 7537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:11.745833', 'step': 7537, 'epoch': 2} {'type': 'loss', 'content': 0.09360439330339432, 'timestamp': '2025-10-01 04:22:11.747945', 'step': 7538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:11.778279', 'step': 7538, 'epoch': 2} {'type': 'loss', 'content': 0.11835967749357224, 'timestamp': '2025-10-01 04:22:11.780376', 'step': 7539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:11.812069', 'step': 7539, 'epoch': 2} {'type': 'loss', 'content': 0.12485092133283615, 'timestamp': '2025-10-01 04:22:11.835992', 'step': 7540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:11.866140', 'step': 7540, 'epoch': 2} {'type': 'loss', 'content': 0.1421549916267395, 'timestamp': '2025-10-01 04:22:11.868364', 'step': 7541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:11.898536', 'step': 7541, 'epoch': 2} {'type': 'loss', 'content': 0.15034683048725128, 'timestamp': '2025-10-01 04:22:11.900563', 'step': 7542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:11.939843', 'step': 7542, 'epoch': 2} {'type': 'loss', 'content': 0.13124606013298035, 'timestamp': '2025-10-01 04:22:11.941863', 'step': 7543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:11.973049', 'step': 7543, 'epoch': 2} {'type': 'loss', 'content': 0.12503664195537567, 'timestamp': '2025-10-01 04:22:11.996478', 'step': 7544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:12.032209', 'step': 7544, 'epoch': 2} {'type': 'loss', 'content': 0.09026741981506348, 'timestamp': '2025-10-01 04:22:12.034753', 'step': 7545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:12.067545', 'step': 7545, 'epoch': 2} {'type': 'loss', 'content': 0.09376104176044464, 'timestamp': '2025-10-01 04:22:12.069593', 'step': 7546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:12.101365', 'step': 7546, 'epoch': 2} {'type': 'loss', 'content': 0.14841307699680328, 'timestamp': '2025-10-01 04:22:12.103473', 'step': 7547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:12.135041', 'step': 7547, 'epoch': 2} {'type': 'loss', 'content': 0.20848004519939423, 'timestamp': '2025-10-01 04:22:12.158788', 'step': 7548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:12.189580', 'step': 7548, 'epoch': 2} {'type': 'loss', 'content': 0.08343581110239029, 'timestamp': '2025-10-01 04:22:12.191632', 'step': 7549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:12.224133', 'step': 7549, 'epoch': 2} {'type': 'loss', 'content': 0.2105219066143036, 'timestamp': '2025-10-01 04:22:12.226212', 'step': 7550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:12.256557', 'step': 7550, 'epoch': 2} {'type': 'loss', 'content': 0.082609161734581, 'timestamp': '2025-10-01 04:22:12.258581', 'step': 7551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:12.288762', 'step': 7551, 'epoch': 2} {'type': 'loss', 'content': 0.0954509750008583, 'timestamp': '2025-10-01 04:22:12.312331', 'step': 7552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:12.348804', 'step': 7552, 'epoch': 2} {'type': 'loss', 'content': 0.1440914273262024, 'timestamp': '2025-10-01 04:22:12.350868', 'step': 7553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:12.381725', 'step': 7553, 'epoch': 2} {'type': 'loss', 'content': 0.099297434091568, 'timestamp': '2025-10-01 04:22:12.384608', 'step': 7554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:12.423343', 'step': 7554, 'epoch': 2} {'type': 'loss', 'content': 0.09034371376037598, 'timestamp': '2025-10-01 04:22:12.431655', 'step': 7555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:12.468428', 'step': 7555, 'epoch': 2} {'type': 'loss', 'content': 0.18128558993339539, 'timestamp': '2025-10-01 04:22:12.493053', 'step': 7556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:12.524639', 'step': 7556, 'epoch': 2} {'type': 'loss', 'content': 0.13377311825752258, 'timestamp': '2025-10-01 04:22:12.526869', 'step': 7557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:12.559533', 'step': 7557, 'epoch': 2} {'type': 'loss', 'content': 0.14839889109134674, 'timestamp': '2025-10-01 04:22:12.561811', 'step': 7558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:12.601793', 'step': 7558, 'epoch': 2} {'type': 'loss', 'content': 0.11340638995170593, 'timestamp': '2025-10-01 04:22:12.604159', 'step': 7559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:12.636297', 'step': 7559, 'epoch': 2} {'type': 'loss', 'content': 0.14007005095481873, 'timestamp': '2025-10-01 04:22:12.660067', 'step': 7560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:12.691137', 'step': 7560, 'epoch': 2} {'type': 'loss', 'content': 0.18659238517284393, 'timestamp': '2025-10-01 04:22:12.693895', 'step': 7561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:12.726423', 'step': 7561, 'epoch': 2} {'type': 'loss', 'content': 0.09641942381858826, 'timestamp': '2025-10-01 04:22:12.728672', 'step': 7562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:12.760743', 'step': 7562, 'epoch': 2} {'type': 'loss', 'content': 0.18926315009593964, 'timestamp': '2025-10-01 04:22:12.763004', 'step': 7563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:12.796727', 'step': 7563, 'epoch': 2} {'type': 'loss', 'content': 0.18682514131069183, 'timestamp': '2025-10-01 04:22:12.820812', 'step': 7564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:12.856518', 'step': 7564, 'epoch': 2} {'type': 'loss', 'content': 0.1562798172235489, 'timestamp': '2025-10-01 04:22:12.859193', 'step': 7565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:12.890884', 'step': 7565, 'epoch': 2} {'type': 'loss', 'content': 0.10999158769845963, 'timestamp': '2025-10-01 04:22:12.893303', 'step': 7566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:12.924193', 'step': 7566, 'epoch': 2} {'type': 'loss', 'content': 0.14704780280590057, 'timestamp': '2025-10-01 04:22:12.926493', 'step': 7567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:12.957235', 'step': 7567, 'epoch': 2} {'type': 'loss', 'content': 0.13275888562202454, 'timestamp': '2025-10-01 04:22:12.981147', 'step': 7568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:13.012270', 'step': 7568, 'epoch': 2} {'type': 'loss', 'content': 0.11659158766269684, 'timestamp': '2025-10-01 04:22:13.014381', 'step': 7569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:13.046183', 'step': 7569, 'epoch': 2} {'type': 'loss', 'content': 0.21454238891601562, 'timestamp': '2025-10-01 04:22:13.048434', 'step': 7570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:13.080380', 'step': 7570, 'epoch': 2} {'type': 'loss', 'content': 0.10205826908349991, 'timestamp': '2025-10-01 04:22:13.087502', 'step': 7571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:13.124419', 'step': 7571, 'epoch': 2} {'type': 'loss', 'content': 0.085706926882267, 'timestamp': '2025-10-01 04:22:13.148485', 'step': 7572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:13.181942', 'step': 7572, 'epoch': 2} {'type': 'loss', 'content': 0.1481042206287384, 'timestamp': '2025-10-01 04:22:13.184280', 'step': 7573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:13.215106', 'step': 7573, 'epoch': 2} {'type': 'loss', 'content': 0.2506609857082367, 'timestamp': '2025-10-01 04:22:13.217776', 'step': 7574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:13.248137', 'step': 7574, 'epoch': 2} {'type': 'loss', 'content': 0.08877367526292801, 'timestamp': '2025-10-01 04:22:13.250926', 'step': 7575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:13.282634', 'step': 7575, 'epoch': 2} {'type': 'loss', 'content': 0.2142549604177475, 'timestamp': '2025-10-01 04:22:13.306727', 'step': 7576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:13.337394', 'step': 7576, 'epoch': 2} {'type': 'loss', 'content': 0.1448608636856079, 'timestamp': '2025-10-01 04:22:13.339749', 'step': 7577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:13.382179', 'step': 7577, 'epoch': 2} {'type': 'loss', 'content': 0.20112168788909912, 'timestamp': '2025-10-01 04:22:13.384299', 'step': 7578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:13.415574', 'step': 7578, 'epoch': 2} {'type': 'loss', 'content': 0.06752585619688034, 'timestamp': '2025-10-01 04:22:13.426072', 'step': 7579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:13.457099', 'step': 7579, 'epoch': 2} {'type': 'loss', 'content': 0.21932366490364075, 'timestamp': '2025-10-01 04:22:13.486542', 'step': 7580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:13.518819', 'step': 7580, 'epoch': 2} {'type': 'loss', 'content': 0.1629224568605423, 'timestamp': '2025-10-01 04:22:13.521454', 'step': 7581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:13.555828', 'step': 7581, 'epoch': 2} {'type': 'loss', 'content': 0.12842515110969543, 'timestamp': '2025-10-01 04:22:13.560412', 'step': 7582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:13.596145', 'step': 7582, 'epoch': 2} {'type': 'loss', 'content': 0.22438186407089233, 'timestamp': '2025-10-01 04:22:13.598698', 'step': 7583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:13.639275', 'step': 7583, 'epoch': 2} {'type': 'loss', 'content': 0.13495001196861267, 'timestamp': '2025-10-01 04:22:13.662906', 'step': 7584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:13.694561', 'step': 7584, 'epoch': 2} {'type': 'loss', 'content': 0.08046254515647888, 'timestamp': '2025-10-01 04:22:13.703454', 'step': 7585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:13.734733', 'step': 7585, 'epoch': 2} {'type': 'loss', 'content': 0.09367559850215912, 'timestamp': '2025-10-01 04:22:13.736734', 'step': 7586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:13.769034', 'step': 7586, 'epoch': 2} {'type': 'loss', 'content': 0.1526723951101303, 'timestamp': '2025-10-01 04:22:13.771524', 'step': 7587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:13.805139', 'step': 7587, 'epoch': 2} {'type': 'loss', 'content': 0.3105735182762146, 'timestamp': '2025-10-01 04:22:13.829166', 'step': 7588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:13.859989', 'step': 7588, 'epoch': 2} {'type': 'loss', 'content': 0.17248505353927612, 'timestamp': '2025-10-01 04:22:13.864053', 'step': 7589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:13.895887', 'step': 7589, 'epoch': 2} {'type': 'loss', 'content': 0.12911741435527802, 'timestamp': '2025-10-01 04:22:13.929018', 'step': 7590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:13.975178', 'step': 7590, 'epoch': 2} {'type': 'loss', 'content': 0.09823184460401535, 'timestamp': '2025-10-01 04:22:13.978275', 'step': 7591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:14.022621', 'step': 7591, 'epoch': 2} {'type': 'loss', 'content': 0.14230579137802124, 'timestamp': '2025-10-01 04:22:14.054297', 'step': 7592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:14.094522', 'step': 7592, 'epoch': 2} {'type': 'loss', 'content': 0.18924511969089508, 'timestamp': '2025-10-01 04:22:14.129624', 'step': 7593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:14.172446', 'step': 7593, 'epoch': 2} {'type': 'loss', 'content': 0.18997910618782043, 'timestamp': '2025-10-01 04:22:14.186476', 'step': 7594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:14.221686', 'step': 7594, 'epoch': 2} {'type': 'loss', 'content': 0.12772288918495178, 'timestamp': '2025-10-01 04:22:14.223615', 'step': 7595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:14.256468', 'step': 7595, 'epoch': 2} {'type': 'loss', 'content': 0.11649835109710693, 'timestamp': '2025-10-01 04:22:14.288676', 'step': 7596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:14.336404', 'step': 7596, 'epoch': 2} {'type': 'loss', 'content': 0.13633687794208527, 'timestamp': '2025-10-01 04:22:14.360811', 'step': 7597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:14.392853', 'step': 7597, 'epoch': 2} {'type': 'loss', 'content': 0.12856632471084595, 'timestamp': '2025-10-01 04:22:14.396632', 'step': 7598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:14.437286', 'step': 7598, 'epoch': 2} {'type': 'loss', 'content': 0.12772531807422638, 'timestamp': '2025-10-01 04:22:14.445636', 'step': 7599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:14.488353', 'step': 7599, 'epoch': 2} {'type': 'loss', 'content': 0.19135399162769318, 'timestamp': '2025-10-01 04:22:14.527143', 'step': 7600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:14.565239', 'step': 7600, 'epoch': 2} {'type': 'loss', 'content': 0.16920077800750732, 'timestamp': '2025-10-01 04:22:14.586761', 'step': 7601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:14.633566', 'step': 7601, 'epoch': 2} {'type': 'loss', 'content': 0.14323638379573822, 'timestamp': '2025-10-01 04:22:14.640036', 'step': 7602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:14.685343', 'step': 7602, 'epoch': 2} {'type': 'loss', 'content': 0.19379639625549316, 'timestamp': '2025-10-01 04:22:14.695874', 'step': 7603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:14.735056', 'step': 7603, 'epoch': 2} {'type': 'loss', 'content': 0.14938099682331085, 'timestamp': '2025-10-01 04:22:14.767886', 'step': 7604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:14.799080', 'step': 7604, 'epoch': 2} {'type': 'loss', 'content': 0.15068396925926208, 'timestamp': '2025-10-01 04:22:14.802167', 'step': 7605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:14.839977', 'step': 7605, 'epoch': 2} {'type': 'loss', 'content': 0.20475080609321594, 'timestamp': '2025-10-01 04:22:14.856784', 'step': 7606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:14.890209', 'step': 7606, 'epoch': 2} {'type': 'loss', 'content': 0.1358093023300171, 'timestamp': '2025-10-01 04:22:14.903228', 'step': 7607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:14.952218', 'step': 7607, 'epoch': 2} {'type': 'loss', 'content': 0.12454573810100555, 'timestamp': '2025-10-01 04:22:14.990134', 'step': 7608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:15.033679', 'step': 7608, 'epoch': 2} {'type': 'loss', 'content': 0.0766504630446434, 'timestamp': '2025-10-01 04:22:15.068758', 'step': 7609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:15.104422', 'step': 7609, 'epoch': 2} {'type': 'loss', 'content': 0.14146079123020172, 'timestamp': '2025-10-01 04:22:15.126974', 'step': 7610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:15.157599', 'step': 7610, 'epoch': 2} {'type': 'loss', 'content': 0.10407441109418869, 'timestamp': '2025-10-01 04:22:15.160395', 'step': 7611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:15.200495', 'step': 7611, 'epoch': 2} {'type': 'loss', 'content': 0.07204832136631012, 'timestamp': '2025-10-01 04:22:15.231620', 'step': 7612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:15.277540', 'step': 7612, 'epoch': 2} {'type': 'loss', 'content': 0.14140582084655762, 'timestamp': '2025-10-01 04:22:15.286346', 'step': 7613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:15.337302', 'step': 7613, 'epoch': 2} {'type': 'loss', 'content': 0.16923153400421143, 'timestamp': '2025-10-01 04:22:15.355002', 'step': 7614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:22:15.413796', 'step': 7614, 'epoch': 2} {'type': 'loss', 'content': 0.26970717310905457, 'timestamp': '2025-10-01 04:22:15.427659', 'step': 7615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:15.462014', 'step': 7615, 'epoch': 2} {'type': 'loss', 'content': 0.154342919588089, 'timestamp': '2025-10-01 04:22:15.485426', 'step': 7616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:15.516424', 'step': 7616, 'epoch': 2} {'type': 'loss', 'content': 0.0804687961935997, 'timestamp': '2025-10-01 04:22:15.518441', 'step': 7617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:15.548669', 'step': 7617, 'epoch': 2} {'type': 'loss', 'content': 0.16902945935726166, 'timestamp': '2025-10-01 04:22:15.550941', 'step': 7618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:15.589992', 'step': 7618, 'epoch': 2} {'type': 'loss', 'content': 0.12147514522075653, 'timestamp': '2025-10-01 04:22:15.592085', 'step': 7619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:15.623803', 'step': 7619, 'epoch': 2} {'type': 'loss', 'content': 0.1263192743062973, 'timestamp': '2025-10-01 04:22:15.647505', 'step': 7620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:15.677843', 'step': 7620, 'epoch': 2} {'type': 'loss', 'content': 0.15215080976486206, 'timestamp': '2025-10-01 04:22:15.680170', 'step': 7621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:15.710576', 'step': 7621, 'epoch': 2} {'type': 'loss', 'content': 0.2069849669933319, 'timestamp': '2025-10-01 04:22:15.712631', 'step': 7622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:15.744560', 'step': 7622, 'epoch': 2} {'type': 'loss', 'content': 0.11282195150852203, 'timestamp': '2025-10-01 04:22:15.746742', 'step': 7623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:15.776775', 'step': 7623, 'epoch': 2} {'type': 'loss', 'content': 0.11236414313316345, 'timestamp': '2025-10-01 04:22:15.800351', 'step': 7624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:15.830597', 'step': 7624, 'epoch': 2} {'type': 'loss', 'content': 0.15381461381912231, 'timestamp': '2025-10-01 04:22:15.832753', 'step': 7625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:15.864064', 'step': 7625, 'epoch': 2} {'type': 'loss', 'content': 0.10843697190284729, 'timestamp': '2025-10-01 04:22:15.866062', 'step': 7626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:15.896255', 'step': 7626, 'epoch': 2} {'type': 'loss', 'content': 0.08051422238349915, 'timestamp': '2025-10-01 04:22:15.898372', 'step': 7627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:15.928673', 'step': 7627, 'epoch': 2} {'type': 'loss', 'content': 0.092287577688694, 'timestamp': '2025-10-01 04:22:15.952745', 'step': 7628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:15.983022', 'step': 7628, 'epoch': 2} {'type': 'loss', 'content': 0.07541774213314056, 'timestamp': '2025-10-01 04:22:15.985105', 'step': 7629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:16.016373', 'step': 7629, 'epoch': 2} {'type': 'loss', 'content': 0.11115837842226028, 'timestamp': '2025-10-01 04:22:16.018328', 'step': 7630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:16.049583', 'step': 7630, 'epoch': 2} {'type': 'loss', 'content': 0.10359381884336472, 'timestamp': '2025-10-01 04:22:16.051882', 'step': 7631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:16.083256', 'step': 7631, 'epoch': 2} {'type': 'loss', 'content': 0.18509240448474884, 'timestamp': '2025-10-01 04:22:16.107735', 'step': 7632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:16.151813', 'step': 7632, 'epoch': 2} {'type': 'loss', 'content': 0.23191064596176147, 'timestamp': '2025-10-01 04:22:16.154030', 'step': 7633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:16.183956', 'step': 7633, 'epoch': 2} {'type': 'loss', 'content': 0.15038831532001495, 'timestamp': '2025-10-01 04:22:16.186158', 'step': 7634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:16.217479', 'step': 7634, 'epoch': 2} {'type': 'loss', 'content': 0.1713859885931015, 'timestamp': '2025-10-01 04:22:16.219690', 'step': 7635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:16.253735', 'step': 7635, 'epoch': 2} {'type': 'loss', 'content': 0.21354956924915314, 'timestamp': '2025-10-01 04:22:16.277086', 'step': 7636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:16.310643', 'step': 7636, 'epoch': 2} {'type': 'loss', 'content': 0.16208906471729279, 'timestamp': '2025-10-01 04:22:16.312654', 'step': 7637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:16.344531', 'step': 7637, 'epoch': 2} {'type': 'loss', 'content': 0.06507415324449539, 'timestamp': '2025-10-01 04:22:16.346531', 'step': 7638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:16.382538', 'step': 7638, 'epoch': 2} {'type': 'loss', 'content': 0.2079206258058548, 'timestamp': '2025-10-01 04:22:16.384644', 'step': 7639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:16.417376', 'step': 7639, 'epoch': 2} {'type': 'loss', 'content': 0.09694979339838028, 'timestamp': '2025-10-01 04:22:16.440828', 'step': 7640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:16.470936', 'step': 7640, 'epoch': 2} {'type': 'loss', 'content': 0.1623341143131256, 'timestamp': '2025-10-01 04:22:16.472898', 'step': 7641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:16.503242', 'step': 7641, 'epoch': 2} {'type': 'loss', 'content': 0.092910036444664, 'timestamp': '2025-10-01 04:22:16.505355', 'step': 7642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:16.536387', 'step': 7642, 'epoch': 2} {'type': 'loss', 'content': 0.15887288749217987, 'timestamp': '2025-10-01 04:22:16.538451', 'step': 7643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:16.569410', 'step': 7643, 'epoch': 2} {'type': 'loss', 'content': 0.08551779389381409, 'timestamp': '2025-10-01 04:22:16.592808', 'step': 7644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:16.624811', 'step': 7644, 'epoch': 2} {'type': 'loss', 'content': 0.15605324506759644, 'timestamp': '2025-10-01 04:22:16.627047', 'step': 7645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:16.663136', 'step': 7645, 'epoch': 2} {'type': 'loss', 'content': 0.0755205973982811, 'timestamp': '2025-10-01 04:22:16.665529', 'step': 7646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:16.705372', 'step': 7646, 'epoch': 2} {'type': 'loss', 'content': 0.12307262420654297, 'timestamp': '2025-10-01 04:22:16.707448', 'step': 7647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:16.738856', 'step': 7647, 'epoch': 2} {'type': 'loss', 'content': 0.14174926280975342, 'timestamp': '2025-10-01 04:22:16.762377', 'step': 7648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:22:16.795273', 'step': 7648, 'epoch': 2} {'type': 'loss', 'content': 0.1644759178161621, 'timestamp': '2025-10-01 04:22:16.797378', 'step': 7649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:16.828848', 'step': 7649, 'epoch': 2} {'type': 'loss', 'content': 0.18573936820030212, 'timestamp': '2025-10-01 04:22:16.837986', 'step': 7650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:16.868391', 'step': 7650, 'epoch': 2} {'type': 'loss', 'content': 0.06550206989049911, 'timestamp': '2025-10-01 04:22:16.870359', 'step': 7651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:16.900796', 'step': 7651, 'epoch': 2} {'type': 'loss', 'content': 0.20857907831668854, 'timestamp': '2025-10-01 04:22:16.924265', 'step': 7652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:16.955216', 'step': 7652, 'epoch': 2} {'type': 'loss', 'content': 0.1439845860004425, 'timestamp': '2025-10-01 04:22:16.957213', 'step': 7653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:16.988047', 'step': 7653, 'epoch': 2} {'type': 'loss', 'content': 0.18012318015098572, 'timestamp': '2025-10-01 04:22:16.992601', 'step': 7654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:17.024141', 'step': 7654, 'epoch': 2} {'type': 'loss', 'content': 0.1353040635585785, 'timestamp': '2025-10-01 04:22:17.026458', 'step': 7655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:17.058070', 'step': 7655, 'epoch': 2} {'type': 'loss', 'content': 0.11807951331138611, 'timestamp': '2025-10-01 04:22:17.081550', 'step': 7656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:17.112244', 'step': 7656, 'epoch': 2} {'type': 'loss', 'content': 0.1345534473657608, 'timestamp': '2025-10-01 04:22:17.114107', 'step': 7657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:17.145635', 'step': 7657, 'epoch': 2} {'type': 'loss', 'content': 0.1686832457780838, 'timestamp': '2025-10-01 04:22:17.147504', 'step': 7658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:17.177394', 'step': 7658, 'epoch': 2} {'type': 'loss', 'content': 0.08657283335924149, 'timestamp': '2025-10-01 04:22:17.179198', 'step': 7659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:17.209525', 'step': 7659, 'epoch': 2} {'type': 'loss', 'content': 0.14179974794387817, 'timestamp': '2025-10-01 04:22:17.232852', 'step': 7660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:17.262698', 'step': 7660, 'epoch': 2} {'type': 'loss', 'content': 0.22728800773620605, 'timestamp': '2025-10-01 04:22:17.264494', 'step': 7661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:17.294561', 'step': 7661, 'epoch': 2} {'type': 'loss', 'content': 0.0929083377122879, 'timestamp': '2025-10-01 04:22:17.296396', 'step': 7662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:17.326889', 'step': 7662, 'epoch': 2} {'type': 'loss', 'content': 0.09824717044830322, 'timestamp': '2025-10-01 04:22:17.329320', 'step': 7663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:17.360393', 'step': 7663, 'epoch': 2} {'type': 'loss', 'content': 0.11096075922250748, 'timestamp': '2025-10-01 04:22:17.383807', 'step': 7664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:17.413664', 'step': 7664, 'epoch': 2} {'type': 'loss', 'content': 0.1969786137342453, 'timestamp': '2025-10-01 04:22:17.415742', 'step': 7665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:17.445506', 'step': 7665, 'epoch': 2} {'type': 'loss', 'content': 0.12013138085603714, 'timestamp': '2025-10-01 04:22:17.447559', 'step': 7666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:17.477782', 'step': 7666, 'epoch': 2} {'type': 'loss', 'content': 0.17180903255939484, 'timestamp': '2025-10-01 04:22:17.479640', 'step': 7667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:17.509474', 'step': 7667, 'epoch': 2} {'type': 'loss', 'content': 0.15428200364112854, 'timestamp': '2025-10-01 04:22:17.533001', 'step': 7668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:17.565686', 'step': 7668, 'epoch': 2} {'type': 'loss', 'content': 0.14916278421878815, 'timestamp': '2025-10-01 04:22:17.567747', 'step': 7669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:17.597897', 'step': 7669, 'epoch': 2} {'type': 'loss', 'content': 0.27891048789024353, 'timestamp': '2025-10-01 04:22:17.599866', 'step': 7670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:17.630058', 'step': 7670, 'epoch': 2} {'type': 'loss', 'content': 0.12576131522655487, 'timestamp': '2025-10-01 04:22:17.632134', 'step': 7671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:17.662456', 'step': 7671, 'epoch': 2} {'type': 'loss', 'content': 0.16592848300933838, 'timestamp': '2025-10-01 04:22:17.686041', 'step': 7672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:17.718217', 'step': 7672, 'epoch': 2} {'type': 'loss', 'content': 0.1226661279797554, 'timestamp': '2025-10-01 04:22:17.720341', 'step': 7673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:17.754269', 'step': 7673, 'epoch': 2} {'type': 'loss', 'content': 0.219548299908638, 'timestamp': '2025-10-01 04:22:17.756444', 'step': 7674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:17.795073', 'step': 7674, 'epoch': 2} {'type': 'loss', 'content': 0.1104348823428154, 'timestamp': '2025-10-01 04:22:17.797657', 'step': 7675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:17.829855', 'step': 7675, 'epoch': 2} {'type': 'loss', 'content': 0.20438124239444733, 'timestamp': '2025-10-01 04:22:17.853536', 'step': 7676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:17.885146', 'step': 7676, 'epoch': 2} {'type': 'loss', 'content': 0.16853265464305878, 'timestamp': '2025-10-01 04:22:17.887121', 'step': 7677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:17.918752', 'step': 7677, 'epoch': 2} {'type': 'loss', 'content': 0.13269226253032684, 'timestamp': '2025-10-01 04:22:17.920824', 'step': 7678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:17.958264', 'step': 7678, 'epoch': 2} {'type': 'loss', 'content': 0.19817130267620087, 'timestamp': '2025-10-01 04:22:17.960507', 'step': 7679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:17.992699', 'step': 7679, 'epoch': 2} {'type': 'loss', 'content': 0.1336863487958908, 'timestamp': '2025-10-01 04:22:18.016634', 'step': 7680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.051358', 'step': 7680, 'epoch': 2} {'type': 'loss', 'content': 0.18294262886047363, 'timestamp': '2025-10-01 04:22:18.053276', 'step': 7681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:18.083798', 'step': 7681, 'epoch': 2} {'type': 'loss', 'content': 0.08349195867776871, 'timestamp': '2025-10-01 04:22:18.085949', 'step': 7682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.120627', 'step': 7682, 'epoch': 2} {'type': 'loss', 'content': 0.20262791216373444, 'timestamp': '2025-10-01 04:22:18.122695', 'step': 7683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:18.157007', 'step': 7683, 'epoch': 2} {'type': 'loss', 'content': 0.09442790597677231, 'timestamp': '2025-10-01 04:22:18.180473', 'step': 7684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.210626', 'step': 7684, 'epoch': 2} {'type': 'loss', 'content': 0.09460588544607162, 'timestamp': '2025-10-01 04:22:18.212792', 'step': 7685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.243605', 'step': 7685, 'epoch': 2} {'type': 'loss', 'content': 0.1520853191614151, 'timestamp': '2025-10-01 04:22:18.245910', 'step': 7686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:18.276044', 'step': 7686, 'epoch': 2} {'type': 'loss', 'content': 0.16829726099967957, 'timestamp': '2025-10-01 04:22:18.278062', 'step': 7687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:18.310246', 'step': 7687, 'epoch': 2} {'type': 'loss', 'content': 0.11704550683498383, 'timestamp': '2025-10-01 04:22:18.333744', 'step': 7688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.364246', 'step': 7688, 'epoch': 2} {'type': 'loss', 'content': 0.19315415620803833, 'timestamp': '2025-10-01 04:22:18.366214', 'step': 7689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:18.398826', 'step': 7689, 'epoch': 2} {'type': 'loss', 'content': 0.0821169912815094, 'timestamp': '2025-10-01 04:22:18.400854', 'step': 7690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.431066', 'step': 7690, 'epoch': 2} {'type': 'loss', 'content': 0.12500105798244476, 'timestamp': '2025-10-01 04:22:18.433139', 'step': 7691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.465153', 'step': 7691, 'epoch': 2} {'type': 'loss', 'content': 0.21412813663482666, 'timestamp': '2025-10-01 04:22:18.488767', 'step': 7692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:18.520413', 'step': 7692, 'epoch': 2} {'type': 'loss', 'content': 0.14171230792999268, 'timestamp': '2025-10-01 04:22:18.522623', 'step': 7693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:18.569540', 'step': 7693, 'epoch': 2} {'type': 'loss', 'content': 0.1280767172574997, 'timestamp': '2025-10-01 04:22:18.571526', 'step': 7694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:18.603136', 'step': 7694, 'epoch': 2} {'type': 'loss', 'content': 0.20092520117759705, 'timestamp': '2025-10-01 04:22:18.605278', 'step': 7695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:18.639177', 'step': 7695, 'epoch': 2} {'type': 'loss', 'content': 0.08054769784212112, 'timestamp': '2025-10-01 04:22:18.662654', 'step': 7696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.693260', 'step': 7696, 'epoch': 2} {'type': 'loss', 'content': 0.16496793925762177, 'timestamp': '2025-10-01 04:22:18.695300', 'step': 7697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.725473', 'step': 7697, 'epoch': 2} {'type': 'loss', 'content': 0.23286165297031403, 'timestamp': '2025-10-01 04:22:18.727323', 'step': 7698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.758279', 'step': 7698, 'epoch': 2} {'type': 'loss', 'content': 0.14640870690345764, 'timestamp': '2025-10-01 04:22:18.760279', 'step': 7699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:18.791238', 'step': 7699, 'epoch': 2} {'type': 'loss', 'content': 0.06248455122113228, 'timestamp': '2025-10-01 04:22:18.814736', 'step': 7700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.845088', 'step': 7700, 'epoch': 2} {'type': 'loss', 'content': 0.19524362683296204, 'timestamp': '2025-10-01 04:22:18.847161', 'step': 7701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:18.877095', 'step': 7701, 'epoch': 2} {'type': 'loss', 'content': 0.06452625244855881, 'timestamp': '2025-10-01 04:22:18.879175', 'step': 7702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.911201', 'step': 7702, 'epoch': 2} {'type': 'loss', 'content': 0.2688363492488861, 'timestamp': '2025-10-01 04:22:18.913191', 'step': 7703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:18.943372', 'step': 7703, 'epoch': 2} {'type': 'loss', 'content': 0.1495879590511322, 'timestamp': '2025-10-01 04:22:18.966892', 'step': 7704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:18.997046', 'step': 7704, 'epoch': 2} {'type': 'loss', 'content': 0.17972782254219055, 'timestamp': '2025-10-01 04:22:18.999037', 'step': 7705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:19.029504', 'step': 7705, 'epoch': 2} {'type': 'loss', 'content': 0.21914520859718323, 'timestamp': '2025-10-01 04:22:19.031689', 'step': 7706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:19.064684', 'step': 7706, 'epoch': 2} {'type': 'loss', 'content': 0.060061197727918625, 'timestamp': '2025-10-01 04:22:19.066789', 'step': 7707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:19.097538', 'step': 7707, 'epoch': 2} {'type': 'loss', 'content': 0.11171175539493561, 'timestamp': '2025-10-01 04:22:19.121109', 'step': 7708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:19.151799', 'step': 7708, 'epoch': 2} {'type': 'loss', 'content': 0.12424546480178833, 'timestamp': '2025-10-01 04:22:19.153610', 'step': 7709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:19.194681', 'step': 7709, 'epoch': 2} {'type': 'loss', 'content': 0.13796131312847137, 'timestamp': '2025-10-01 04:22:19.196654', 'step': 7710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:19.227231', 'step': 7710, 'epoch': 2} {'type': 'loss', 'content': 0.09533967822790146, 'timestamp': '2025-10-01 04:22:19.229405', 'step': 7711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:19.260344', 'step': 7711, 'epoch': 2} {'type': 'loss', 'content': 0.14421020448207855, 'timestamp': '2025-10-01 04:22:19.283826', 'step': 7712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:19.313792', 'step': 7712, 'epoch': 2} {'type': 'loss', 'content': 0.19625324010849, 'timestamp': '2025-10-01 04:22:19.315844', 'step': 7713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:19.354277', 'step': 7713, 'epoch': 2} {'type': 'loss', 'content': 0.12468092888593674, 'timestamp': '2025-10-01 04:22:19.356290', 'step': 7714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:19.386028', 'step': 7714, 'epoch': 2} {'type': 'loss', 'content': 0.09786465018987656, 'timestamp': '2025-10-01 04:22:19.387977', 'step': 7715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:19.418180', 'step': 7715, 'epoch': 2} {'type': 'loss', 'content': 0.08095791935920715, 'timestamp': '2025-10-01 04:22:19.441694', 'step': 7716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:19.471484', 'step': 7716, 'epoch': 2} {'type': 'loss', 'content': 0.14846186339855194, 'timestamp': '2025-10-01 04:22:19.473418', 'step': 7717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:19.504261', 'step': 7717, 'epoch': 2} {'type': 'loss', 'content': 0.10450981557369232, 'timestamp': '2025-10-01 04:22:19.506403', 'step': 7718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:19.536903', 'step': 7718, 'epoch': 2} {'type': 'loss', 'content': 0.10367907583713531, 'timestamp': '2025-10-01 04:22:19.538945', 'step': 7719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:19.569261', 'step': 7719, 'epoch': 2} {'type': 'loss', 'content': 0.18671059608459473, 'timestamp': '2025-10-01 04:22:19.592812', 'step': 7720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:19.622799', 'step': 7720, 'epoch': 2} {'type': 'loss', 'content': 0.131504625082016, 'timestamp': '2025-10-01 04:22:19.624780', 'step': 7721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:19.654631', 'step': 7721, 'epoch': 2} {'type': 'loss', 'content': 0.0826483964920044, 'timestamp': '2025-10-01 04:22:19.656715', 'step': 7722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:19.686908', 'step': 7722, 'epoch': 2} {'type': 'loss', 'content': 0.14897489547729492, 'timestamp': '2025-10-01 04:22:19.689867', 'step': 7723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:22:19.720687', 'step': 7723, 'epoch': 2} {'type': 'loss', 'content': 0.1442456692457199, 'timestamp': '2025-10-01 04:22:19.748607', 'step': 7724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:19.778968', 'step': 7724, 'epoch': 2} {'type': 'loss', 'content': 0.14701548218727112, 'timestamp': '2025-10-01 04:22:19.781591', 'step': 7725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:19.813915', 'step': 7725, 'epoch': 2} {'type': 'loss', 'content': 0.09674247354269028, 'timestamp': '2025-10-01 04:22:19.815995', 'step': 7726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:19.846053', 'step': 7726, 'epoch': 2} {'type': 'loss', 'content': 0.0935075581073761, 'timestamp': '2025-10-01 04:22:19.848054', 'step': 7727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:19.878370', 'step': 7727, 'epoch': 2} {'type': 'loss', 'content': 0.1783541738986969, 'timestamp': '2025-10-01 04:22:19.901974', 'step': 7728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:19.932611', 'step': 7728, 'epoch': 2} {'type': 'loss', 'content': 0.10907814651727676, 'timestamp': '2025-10-01 04:22:19.934717', 'step': 7729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:19.964755', 'step': 7729, 'epoch': 2} {'type': 'loss', 'content': 0.07219184190034866, 'timestamp': '2025-10-01 04:22:19.966776', 'step': 7730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:19.997153', 'step': 7730, 'epoch': 2} {'type': 'loss', 'content': 0.20868350565433502, 'timestamp': '2025-10-01 04:22:19.999542', 'step': 7731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:20.030258', 'step': 7731, 'epoch': 2} {'type': 'loss', 'content': 0.1878860592842102, 'timestamp': '2025-10-01 04:22:20.053843', 'step': 7732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:20.092684', 'step': 7732, 'epoch': 2} {'type': 'loss', 'content': 0.09487155079841614, 'timestamp': '2025-10-01 04:22:20.099272', 'step': 7733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:20.130201', 'step': 7733, 'epoch': 2} {'type': 'loss', 'content': 0.10298816114664078, 'timestamp': '2025-10-01 04:22:20.132660', 'step': 7734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:20.165678', 'step': 7734, 'epoch': 2} {'type': 'loss', 'content': 0.10436556488275528, 'timestamp': '2025-10-01 04:22:20.167847', 'step': 7735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:20.201752', 'step': 7735, 'epoch': 2} {'type': 'loss', 'content': 0.08603167533874512, 'timestamp': '2025-10-01 04:22:20.225553', 'step': 7736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:20.256273', 'step': 7736, 'epoch': 2} {'type': 'loss', 'content': 0.13852301239967346, 'timestamp': '2025-10-01 04:22:20.258495', 'step': 7737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:20.289630', 'step': 7737, 'epoch': 2} {'type': 'loss', 'content': 0.12723693251609802, 'timestamp': '2025-10-01 04:22:20.291801', 'step': 7738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:20.323709', 'step': 7738, 'epoch': 2} {'type': 'loss', 'content': 0.12533125281333923, 'timestamp': '2025-10-01 04:22:20.325749', 'step': 7739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:20.356618', 'step': 7739, 'epoch': 2} {'type': 'loss', 'content': 0.16196341812610626, 'timestamp': '2025-10-01 04:22:20.380066', 'step': 7740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:20.410110', 'step': 7740, 'epoch': 2} {'type': 'loss', 'content': 0.05420445278286934, 'timestamp': '2025-10-01 04:22:20.412207', 'step': 7741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:20.443192', 'step': 7741, 'epoch': 2} {'type': 'loss', 'content': 0.11085215210914612, 'timestamp': '2025-10-01 04:22:20.445121', 'step': 7742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:20.474922', 'step': 7742, 'epoch': 2} {'type': 'loss', 'content': 0.08561088144779205, 'timestamp': '2025-10-01 04:22:20.476919', 'step': 7743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:20.506510', 'step': 7743, 'epoch': 2} {'type': 'loss', 'content': 0.06414195895195007, 'timestamp': '2025-10-01 04:22:20.530042', 'step': 7744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:20.561594', 'step': 7744, 'epoch': 2} {'type': 'loss', 'content': 0.158145472407341, 'timestamp': '2025-10-01 04:22:20.563615', 'step': 7745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:20.594218', 'step': 7745, 'epoch': 2} {'type': 'loss', 'content': 0.22053800523281097, 'timestamp': '2025-10-01 04:22:20.602605', 'step': 7746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:20.632552', 'step': 7746, 'epoch': 2} {'type': 'loss', 'content': 0.22456300258636475, 'timestamp': '2025-10-01 04:22:20.634610', 'step': 7747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:20.665801', 'step': 7747, 'epoch': 2} {'type': 'loss', 'content': 0.2542274594306946, 'timestamp': '2025-10-01 04:22:20.689661', 'step': 7748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:20.719842', 'step': 7748, 'epoch': 2} {'type': 'loss', 'content': 0.1838947981595993, 'timestamp': '2025-10-01 04:22:20.721892', 'step': 7749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:20.752014', 'step': 7749, 'epoch': 2} {'type': 'loss', 'content': 0.12043406069278717, 'timestamp': '2025-10-01 04:22:20.754164', 'step': 7750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:20.784664', 'step': 7750, 'epoch': 2} {'type': 'loss', 'content': 0.0987897664308548, 'timestamp': '2025-10-01 04:22:20.790604', 'step': 7751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:20.823104', 'step': 7751, 'epoch': 2} {'type': 'loss', 'content': 0.21002300083637238, 'timestamp': '2025-10-01 04:22:20.846960', 'step': 7752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:20.878897', 'step': 7752, 'epoch': 2} {'type': 'loss', 'content': 0.19711141288280487, 'timestamp': '2025-10-01 04:22:20.880883', 'step': 7753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:20.911036', 'step': 7753, 'epoch': 2} {'type': 'loss', 'content': 0.15981492400169373, 'timestamp': '2025-10-01 04:22:20.913063', 'step': 7754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:20.945010', 'step': 7754, 'epoch': 2} {'type': 'loss', 'content': 0.149274080991745, 'timestamp': '2025-10-01 04:22:20.953636', 'step': 7755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:20.984055', 'step': 7755, 'epoch': 2} {'type': 'loss', 'content': 0.08168934285640717, 'timestamp': '2025-10-01 04:22:21.007565', 'step': 7756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:21.037380', 'step': 7756, 'epoch': 2} {'type': 'loss', 'content': 0.1257486492395401, 'timestamp': '2025-10-01 04:22:21.039468', 'step': 7757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:21.070860', 'step': 7757, 'epoch': 2} {'type': 'loss', 'content': 0.11310536414384842, 'timestamp': '2025-10-01 04:22:21.072678', 'step': 7758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:21.103697', 'step': 7758, 'epoch': 2} {'type': 'loss', 'content': 0.10029134154319763, 'timestamp': '2025-10-01 04:22:21.105602', 'step': 7759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:21.137107', 'step': 7759, 'epoch': 2} {'type': 'loss', 'content': 0.15288496017456055, 'timestamp': '2025-10-01 04:22:21.160539', 'step': 7760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:21.190592', 'step': 7760, 'epoch': 2} {'type': 'loss', 'content': 0.17887964844703674, 'timestamp': '2025-10-01 04:22:21.192607', 'step': 7761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:21.223411', 'step': 7761, 'epoch': 2} {'type': 'loss', 'content': 0.1548730880022049, 'timestamp': '2025-10-01 04:22:21.225761', 'step': 7762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:21.257182', 'step': 7762, 'epoch': 2} {'type': 'loss', 'content': 0.1302260011434555, 'timestamp': '2025-10-01 04:22:21.259274', 'step': 7763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:21.290183', 'step': 7763, 'epoch': 2} {'type': 'loss', 'content': 0.1366652250289917, 'timestamp': '2025-10-01 04:22:21.314223', 'step': 7764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-10-01 04:22:21.351105', 'step': 7764, 'epoch': 2} {'type': 'loss', 'content': 0.11569936573505402, 'timestamp': '2025-10-01 04:22:21.366900', 'step': 7765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:21.397377', 'step': 7765, 'epoch': 2} {'type': 'loss', 'content': 0.1817784458398819, 'timestamp': '2025-10-01 04:22:21.400114', 'step': 7766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:21.430303', 'step': 7766, 'epoch': 2} {'type': 'loss', 'content': 0.13764524459838867, 'timestamp': '2025-10-01 04:22:21.432615', 'step': 7767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:21.463684', 'step': 7767, 'epoch': 2} {'type': 'loss', 'content': 0.11948049813508987, 'timestamp': '2025-10-01 04:22:21.487232', 'step': 7768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:21.518099', 'step': 7768, 'epoch': 2} {'type': 'loss', 'content': 0.13298001885414124, 'timestamp': '2025-10-01 04:22:21.520185', 'step': 7769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:21.550381', 'step': 7769, 'epoch': 2} {'type': 'loss', 'content': 0.15836860239505768, 'timestamp': '2025-10-01 04:22:21.552432', 'step': 7770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:21.583123', 'step': 7770, 'epoch': 2} {'type': 'loss', 'content': 0.18306368589401245, 'timestamp': '2025-10-01 04:22:21.585171', 'step': 7771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:21.615628', 'step': 7771, 'epoch': 2} {'type': 'loss', 'content': 0.08668730407953262, 'timestamp': '2025-10-01 04:22:21.639338', 'step': 7772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:21.673680', 'step': 7772, 'epoch': 2} {'type': 'loss', 'content': 0.0712243840098381, 'timestamp': '2025-10-01 04:22:21.677304', 'step': 7773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:21.708184', 'step': 7773, 'epoch': 2} {'type': 'loss', 'content': 0.11554929614067078, 'timestamp': '2025-10-01 04:22:21.710585', 'step': 7774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:21.744646', 'step': 7774, 'epoch': 2} {'type': 'loss', 'content': 0.12008743733167648, 'timestamp': '2025-10-01 04:22:21.747225', 'step': 7775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:21.777441', 'step': 7775, 'epoch': 2} {'type': 'loss', 'content': 0.10131629556417465, 'timestamp': '2025-10-01 04:22:21.801563', 'step': 7776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:21.836183', 'step': 7776, 'epoch': 2} {'type': 'loss', 'content': 0.09902163594961166, 'timestamp': '2025-10-01 04:22:21.838362', 'step': 7777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:21.870160', 'step': 7777, 'epoch': 2} {'type': 'loss', 'content': 0.10794015973806381, 'timestamp': '2025-10-01 04:22:21.872243', 'step': 7778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:21.903539', 'step': 7778, 'epoch': 2} {'type': 'loss', 'content': 0.15616722404956818, 'timestamp': '2025-10-01 04:22:21.905761', 'step': 7779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:21.935593', 'step': 7779, 'epoch': 2} {'type': 'loss', 'content': 0.14889554679393768, 'timestamp': '2025-10-01 04:22:21.962407', 'step': 7780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:21.998692', 'step': 7780, 'epoch': 2} {'type': 'loss', 'content': 0.22053827345371246, 'timestamp': '2025-10-01 04:22:22.000737', 'step': 7781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:22.031208', 'step': 7781, 'epoch': 2} {'type': 'loss', 'content': 0.1247621551156044, 'timestamp': '2025-10-01 04:22:22.033448', 'step': 7782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:22.064864', 'step': 7782, 'epoch': 2} {'type': 'loss', 'content': 0.09134279191493988, 'timestamp': '2025-10-01 04:22:22.066998', 'step': 7783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:22.097615', 'step': 7783, 'epoch': 2} {'type': 'loss', 'content': 0.0869494080543518, 'timestamp': '2025-10-01 04:22:22.130319', 'step': 7784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:22.160714', 'step': 7784, 'epoch': 2} {'type': 'loss', 'content': 0.10469978302717209, 'timestamp': '2025-10-01 04:22:22.163320', 'step': 7785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:22.194267', 'step': 7785, 'epoch': 2} {'type': 'loss', 'content': 0.19893327355384827, 'timestamp': '2025-10-01 04:22:22.196584', 'step': 7786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:22.228172', 'step': 7786, 'epoch': 2} {'type': 'loss', 'content': 0.11656875163316727, 'timestamp': '2025-10-01 04:22:22.230336', 'step': 7787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:22.266678', 'step': 7787, 'epoch': 2} {'type': 'loss', 'content': 0.1684635877609253, 'timestamp': '2025-10-01 04:22:22.294133', 'step': 7788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:22.327959', 'step': 7788, 'epoch': 2} {'type': 'loss', 'content': 0.1523762345314026, 'timestamp': '2025-10-01 04:22:22.330081', 'step': 7789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:22.362505', 'step': 7789, 'epoch': 2} {'type': 'loss', 'content': 0.0773099884390831, 'timestamp': '2025-10-01 04:22:22.365021', 'step': 7790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:22.396141', 'step': 7790, 'epoch': 2} {'type': 'loss', 'content': 0.17129558324813843, 'timestamp': '2025-10-01 04:22:22.398762', 'step': 7791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:22.432275', 'step': 7791, 'epoch': 2} {'type': 'loss', 'content': 0.16207505762577057, 'timestamp': '2025-10-01 04:22:22.456400', 'step': 7792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:22.486011', 'step': 7792, 'epoch': 2} {'type': 'loss', 'content': 0.09111864864826202, 'timestamp': '2025-10-01 04:22:22.499043', 'step': 7793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:22.532228', 'step': 7793, 'epoch': 2} {'type': 'loss', 'content': 0.18078553676605225, 'timestamp': '2025-10-01 04:22:22.534651', 'step': 7794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:22.568107', 'step': 7794, 'epoch': 2} {'type': 'loss', 'content': 0.07888621091842651, 'timestamp': '2025-10-01 04:22:22.572385', 'step': 7795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:22.605582', 'step': 7795, 'epoch': 2} {'type': 'loss', 'content': 0.13149267435073853, 'timestamp': '2025-10-01 04:22:22.633408', 'step': 7796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:22.670449', 'step': 7796, 'epoch': 2} {'type': 'loss', 'content': 0.1535596251487732, 'timestamp': '2025-10-01 04:22:22.672548', 'step': 7797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:22.703859', 'step': 7797, 'epoch': 2} {'type': 'loss', 'content': 0.09919071197509766, 'timestamp': '2025-10-01 04:22:22.706305', 'step': 7798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:22.737116', 'step': 7798, 'epoch': 2} {'type': 'loss', 'content': 0.1346098929643631, 'timestamp': '2025-10-01 04:22:22.747389', 'step': 7799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:22.783971', 'step': 7799, 'epoch': 2} {'type': 'loss', 'content': 0.1351308524608612, 'timestamp': '2025-10-01 04:22:22.807612', 'step': 7800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:22.841207', 'step': 7800, 'epoch': 2} {'type': 'loss', 'content': 0.1347968429327011, 'timestamp': '2025-10-01 04:22:22.843160', 'step': 7801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:22.877475', 'step': 7801, 'epoch': 2} {'type': 'loss', 'content': 0.13877461850643158, 'timestamp': '2025-10-01 04:22:22.883307', 'step': 7802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:22.915012', 'step': 7802, 'epoch': 2} {'type': 'loss', 'content': 0.1213386058807373, 'timestamp': '2025-10-01 04:22:22.919164', 'step': 7803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:22.950877', 'step': 7803, 'epoch': 2} {'type': 'loss', 'content': 0.12495730817317963, 'timestamp': '2025-10-01 04:22:22.974407', 'step': 7804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:23.005845', 'step': 7804, 'epoch': 2} {'type': 'loss', 'content': 0.1967068463563919, 'timestamp': '2025-10-01 04:22:23.007993', 'step': 7805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:23.040268', 'step': 7805, 'epoch': 2} {'type': 'loss', 'content': 0.20329608023166656, 'timestamp': '2025-10-01 04:22:23.043061', 'step': 7806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:23.083161', 'step': 7806, 'epoch': 2} {'type': 'loss', 'content': 0.18076163530349731, 'timestamp': '2025-10-01 04:22:23.085330', 'step': 7807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:23.116038', 'step': 7807, 'epoch': 2} {'type': 'loss', 'content': 0.09801194816827774, 'timestamp': '2025-10-01 04:22:23.139715', 'step': 7808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:23.175186', 'step': 7808, 'epoch': 2} {'type': 'loss', 'content': 0.11592338979244232, 'timestamp': '2025-10-01 04:22:23.177467', 'step': 7809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:23.208250', 'step': 7809, 'epoch': 2} {'type': 'loss', 'content': 0.08514281362295151, 'timestamp': '2025-10-01 04:22:23.210501', 'step': 7810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:23.241416', 'step': 7810, 'epoch': 2} {'type': 'loss', 'content': 0.1259206235408783, 'timestamp': '2025-10-01 04:22:23.243647', 'step': 7811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:23.273504', 'step': 7811, 'epoch': 2} {'type': 'loss', 'content': 0.16501447558403015, 'timestamp': '2025-10-01 04:22:23.297036', 'step': 7812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:23.327905', 'step': 7812, 'epoch': 2} {'type': 'loss', 'content': 0.06965754181146622, 'timestamp': '2025-10-01 04:22:23.329894', 'step': 7813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:23.360413', 'step': 7813, 'epoch': 2} {'type': 'loss', 'content': 0.08031319826841354, 'timestamp': '2025-10-01 04:22:23.362642', 'step': 7814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:23.395501', 'step': 7814, 'epoch': 2} {'type': 'loss', 'content': 0.0746316909790039, 'timestamp': '2025-10-01 04:22:23.399253', 'step': 7815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:23.436993', 'step': 7815, 'epoch': 2} {'type': 'loss', 'content': 0.25835177302360535, 'timestamp': '2025-10-01 04:22:23.464988', 'step': 7816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:23.498740', 'step': 7816, 'epoch': 2} {'type': 'loss', 'content': 0.1533391922712326, 'timestamp': '2025-10-01 04:22:23.501497', 'step': 7817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:23.531858', 'step': 7817, 'epoch': 2} {'type': 'loss', 'content': 0.06311879307031631, 'timestamp': '2025-10-01 04:22:23.534835', 'step': 7818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:23.565973', 'step': 7818, 'epoch': 2} {'type': 'loss', 'content': 0.0835387334227562, 'timestamp': '2025-10-01 04:22:23.569160', 'step': 7819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:23.601564', 'step': 7819, 'epoch': 2} {'type': 'loss', 'content': 0.17227965593338013, 'timestamp': '2025-10-01 04:22:23.625232', 'step': 7820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:23.655621', 'step': 7820, 'epoch': 2} {'type': 'loss', 'content': 0.07666303962469101, 'timestamp': '2025-10-01 04:22:23.657873', 'step': 7821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:23.687550', 'step': 7821, 'epoch': 2} {'type': 'loss', 'content': 0.0925864577293396, 'timestamp': '2025-10-01 04:22:23.689764', 'step': 7822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:23.720707', 'step': 7822, 'epoch': 2} {'type': 'loss', 'content': 0.06724600493907928, 'timestamp': '2025-10-01 04:22:23.723083', 'step': 7823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:23.754617', 'step': 7823, 'epoch': 2} {'type': 'loss', 'content': 0.10042940825223923, 'timestamp': '2025-10-01 04:22:23.778649', 'step': 7824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:23.809685', 'step': 7824, 'epoch': 2} {'type': 'loss', 'content': 0.19821369647979736, 'timestamp': '2025-10-01 04:22:23.813766', 'step': 7825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:23.847558', 'step': 7825, 'epoch': 2} {'type': 'loss', 'content': 0.1255425661802292, 'timestamp': '2025-10-01 04:22:23.849887', 'step': 7826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:23.883938', 'step': 7826, 'epoch': 2} {'type': 'loss', 'content': 0.2963443994522095, 'timestamp': '2025-10-01 04:22:23.886217', 'step': 7827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:23.919224', 'step': 7827, 'epoch': 2} {'type': 'loss', 'content': 0.11735618114471436, 'timestamp': '2025-10-01 04:22:23.942853', 'step': 7828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:23.972989', 'step': 7828, 'epoch': 2} {'type': 'loss', 'content': 0.19722162187099457, 'timestamp': '2025-10-01 04:22:23.975490', 'step': 7829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:24.009339', 'step': 7829, 'epoch': 2} {'type': 'loss', 'content': 0.05387198552489281, 'timestamp': '2025-10-01 04:22:24.012575', 'step': 7830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:24.043603', 'step': 7830, 'epoch': 2} {'type': 'loss', 'content': 0.12418025732040405, 'timestamp': '2025-10-01 04:22:24.045768', 'step': 7831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:24.076915', 'step': 7831, 'epoch': 2} {'type': 'loss', 'content': 0.1814621537923813, 'timestamp': '2025-10-01 04:22:24.100641', 'step': 7832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:22:24.131610', 'step': 7832, 'epoch': 2} {'type': 'loss', 'content': 0.1071975976228714, 'timestamp': '2025-10-01 04:22:24.133841', 'step': 7833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:24.165307', 'step': 7833, 'epoch': 2} {'type': 'loss', 'content': 0.12449862062931061, 'timestamp': '2025-10-01 04:22:24.167548', 'step': 7834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:24.199429', 'step': 7834, 'epoch': 2} {'type': 'loss', 'content': 0.12314490973949432, 'timestamp': '2025-10-01 04:22:24.201671', 'step': 7835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:24.232861', 'step': 7835, 'epoch': 2} {'type': 'loss', 'content': 0.07587811350822449, 'timestamp': '2025-10-01 04:22:24.256540', 'step': 7836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:24.287034', 'step': 7836, 'epoch': 2} {'type': 'loss', 'content': 0.1551344245672226, 'timestamp': '2025-10-01 04:22:24.289128', 'step': 7837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:24.319707', 'step': 7837, 'epoch': 2} {'type': 'loss', 'content': 0.1018790677189827, 'timestamp': '2025-10-01 04:22:24.321887', 'step': 7838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:24.352562', 'step': 7838, 'epoch': 2} {'type': 'loss', 'content': 0.14448770880699158, 'timestamp': '2025-10-01 04:22:24.354749', 'step': 7839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:24.385564', 'step': 7839, 'epoch': 2} {'type': 'loss', 'content': 0.24759061634540558, 'timestamp': '2025-10-01 04:22:24.409011', 'step': 7840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:24.441021', 'step': 7840, 'epoch': 2} {'type': 'loss', 'content': 0.14440079033374786, 'timestamp': '2025-10-01 04:22:24.443237', 'step': 7841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:24.473377', 'step': 7841, 'epoch': 2} {'type': 'loss', 'content': 0.26380789279937744, 'timestamp': '2025-10-01 04:22:24.475710', 'step': 7842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:24.517282', 'step': 7842, 'epoch': 2} {'type': 'loss', 'content': 0.05901410058140755, 'timestamp': '2025-10-01 04:22:24.519441', 'step': 7843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:24.550500', 'step': 7843, 'epoch': 2} {'type': 'loss', 'content': 0.11036067456007004, 'timestamp': '2025-10-01 04:22:24.573938', 'step': 7844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:24.605669', 'step': 7844, 'epoch': 2} {'type': 'loss', 'content': 0.14312787353992462, 'timestamp': '2025-10-01 04:22:24.607827', 'step': 7845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:24.641943', 'step': 7845, 'epoch': 2} {'type': 'loss', 'content': 0.08860421925783157, 'timestamp': '2025-10-01 04:22:24.643944', 'step': 7846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:24.674404', 'step': 7846, 'epoch': 2} {'type': 'loss', 'content': 0.05634889379143715, 'timestamp': '2025-10-01 04:22:24.676778', 'step': 7847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:24.707552', 'step': 7847, 'epoch': 2} {'type': 'loss', 'content': 0.18014582991600037, 'timestamp': '2025-10-01 04:22:24.731240', 'step': 7848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:24.761689', 'step': 7848, 'epoch': 2} {'type': 'loss', 'content': 0.16381512582302094, 'timestamp': '2025-10-01 04:22:24.763665', 'step': 7849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:24.793790', 'step': 7849, 'epoch': 2} {'type': 'loss', 'content': 0.11984996497631073, 'timestamp': '2025-10-01 04:22:24.795774', 'step': 7850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:24.828704', 'step': 7850, 'epoch': 2} {'type': 'loss', 'content': 0.15388599038124084, 'timestamp': '2025-10-01 04:22:24.838387', 'step': 7851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:24.870571', 'step': 7851, 'epoch': 2} {'type': 'loss', 'content': 0.14523059129714966, 'timestamp': '2025-10-01 04:22:24.894352', 'step': 7852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:24.928163', 'step': 7852, 'epoch': 2} {'type': 'loss', 'content': 0.09740683436393738, 'timestamp': '2025-10-01 04:22:24.932057', 'step': 7853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:24.964097', 'step': 7853, 'epoch': 2} {'type': 'loss', 'content': 0.080708809196949, 'timestamp': '2025-10-01 04:22:24.966406', 'step': 7854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:24.997129', 'step': 7854, 'epoch': 2} {'type': 'loss', 'content': 0.21183772385120392, 'timestamp': '2025-10-01 04:22:24.999408', 'step': 7855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:25.030597', 'step': 7855, 'epoch': 2} {'type': 'loss', 'content': 0.10654588788747787, 'timestamp': '2025-10-01 04:22:25.054283', 'step': 7856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:25.084626', 'step': 7856, 'epoch': 2} {'type': 'loss', 'content': 0.10646271705627441, 'timestamp': '2025-10-01 04:22:25.086546', 'step': 7857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:25.117778', 'step': 7857, 'epoch': 2} {'type': 'loss', 'content': 0.23898054659366608, 'timestamp': '2025-10-01 04:22:25.119920', 'step': 7858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:25.151023', 'step': 7858, 'epoch': 2} {'type': 'loss', 'content': 0.13462041318416595, 'timestamp': '2025-10-01 04:22:25.164736', 'step': 7859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:25.203192', 'step': 7859, 'epoch': 2} {'type': 'loss', 'content': 0.12308499962091446, 'timestamp': '2025-10-01 04:22:25.226939', 'step': 7860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:25.257855', 'step': 7860, 'epoch': 2} {'type': 'loss', 'content': 0.17149420082569122, 'timestamp': '2025-10-01 04:22:25.259965', 'step': 7861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:25.291684', 'step': 7861, 'epoch': 2} {'type': 'loss', 'content': 0.10605920851230621, 'timestamp': '2025-10-01 04:22:25.293978', 'step': 7862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:25.326545', 'step': 7862, 'epoch': 2} {'type': 'loss', 'content': 0.13391990959644318, 'timestamp': '2025-10-01 04:22:25.328763', 'step': 7863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:25.361167', 'step': 7863, 'epoch': 2} {'type': 'loss', 'content': 0.07382993400096893, 'timestamp': '2025-10-01 04:22:25.384812', 'step': 7864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:25.421712', 'step': 7864, 'epoch': 2} {'type': 'loss', 'content': 0.17243406176567078, 'timestamp': '2025-10-01 04:22:25.424125', 'step': 7865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:25.454211', 'step': 7865, 'epoch': 2} {'type': 'loss', 'content': 0.10614746809005737, 'timestamp': '2025-10-01 04:22:25.456592', 'step': 7866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:25.486913', 'step': 7866, 'epoch': 2} {'type': 'loss', 'content': 0.07289005070924759, 'timestamp': '2025-10-01 04:22:25.490054', 'step': 7867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:25.520507', 'step': 7867, 'epoch': 2} {'type': 'loss', 'content': 0.11040478199720383, 'timestamp': '2025-10-01 04:22:25.544384', 'step': 7868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:25.575155', 'step': 7868, 'epoch': 2} {'type': 'loss', 'content': 0.12858697772026062, 'timestamp': '2025-10-01 04:22:25.577515', 'step': 7869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:25.607861', 'step': 7869, 'epoch': 2} {'type': 'loss', 'content': 0.12893211841583252, 'timestamp': '2025-10-01 04:22:25.609780', 'step': 7870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:25.640060', 'step': 7870, 'epoch': 2} {'type': 'loss', 'content': 0.19438080489635468, 'timestamp': '2025-10-01 04:22:25.650372', 'step': 7871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:25.685807', 'step': 7871, 'epoch': 2} {'type': 'loss', 'content': 0.15406517684459686, 'timestamp': '2025-10-01 04:22:25.710572', 'step': 7872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:25.741516', 'step': 7872, 'epoch': 2} {'type': 'loss', 'content': 0.12034793198108673, 'timestamp': '2025-10-01 04:22:25.743784', 'step': 7873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:25.782970', 'step': 7873, 'epoch': 2} {'type': 'loss', 'content': 0.08845862001180649, 'timestamp': '2025-10-01 04:22:25.785032', 'step': 7874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:25.815796', 'step': 7874, 'epoch': 2} {'type': 'loss', 'content': 0.08558708429336548, 'timestamp': '2025-10-01 04:22:25.827781', 'step': 7875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:25.858614', 'step': 7875, 'epoch': 2} {'type': 'loss', 'content': 0.08221922069787979, 'timestamp': '2025-10-01 04:22:25.882238', 'step': 7876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:25.914685', 'step': 7876, 'epoch': 2} {'type': 'loss', 'content': 0.07982008904218674, 'timestamp': '2025-10-01 04:22:25.916711', 'step': 7877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:25.947686', 'step': 7877, 'epoch': 2} {'type': 'loss', 'content': 0.08743134140968323, 'timestamp': '2025-10-01 04:22:25.949725', 'step': 7878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:25.980479', 'step': 7878, 'epoch': 2} {'type': 'loss', 'content': 0.07576721161603928, 'timestamp': '2025-10-01 04:22:25.982825', 'step': 7879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:26.013362', 'step': 7879, 'epoch': 2} {'type': 'loss', 'content': 0.11687779426574707, 'timestamp': '2025-10-01 04:22:26.036877', 'step': 7880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:26.066744', 'step': 7880, 'epoch': 2} {'type': 'loss', 'content': 0.08838482946157455, 'timestamp': '2025-10-01 04:22:26.068939', 'step': 7881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:26.100582', 'step': 7881, 'epoch': 2} {'type': 'loss', 'content': 0.17276571691036224, 'timestamp': '2025-10-01 04:22:26.102800', 'step': 7882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:26.133562', 'step': 7882, 'epoch': 2} {'type': 'loss', 'content': 0.11726361513137817, 'timestamp': '2025-10-01 04:22:26.135964', 'step': 7883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:26.167242', 'step': 7883, 'epoch': 2} {'type': 'loss', 'content': 0.12495876103639603, 'timestamp': '2025-10-01 04:22:26.190782', 'step': 7884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:26.221540', 'step': 7884, 'epoch': 2} {'type': 'loss', 'content': 0.07436452805995941, 'timestamp': '2025-10-01 04:22:26.223815', 'step': 7885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:26.254685', 'step': 7885, 'epoch': 2} {'type': 'loss', 'content': 0.12687350809574127, 'timestamp': '2025-10-01 04:22:26.256786', 'step': 7886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:26.286615', 'step': 7886, 'epoch': 2} {'type': 'loss', 'content': 0.20716477930545807, 'timestamp': '2025-10-01 04:22:26.288730', 'step': 7887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:26.319294', 'step': 7887, 'epoch': 2} {'type': 'loss', 'content': 0.19367027282714844, 'timestamp': '2025-10-01 04:22:26.342910', 'step': 7888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:26.374165', 'step': 7888, 'epoch': 2} {'type': 'loss', 'content': 0.10240964591503143, 'timestamp': '2025-10-01 04:22:26.376228', 'step': 7889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:26.407446', 'step': 7889, 'epoch': 2} {'type': 'loss', 'content': 0.026239274069666862, 'timestamp': '2025-10-01 04:22:26.409776', 'step': 7890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:26.440691', 'step': 7890, 'epoch': 2} {'type': 'loss', 'content': 0.11315663903951645, 'timestamp': '2025-10-01 04:22:26.443573', 'step': 7891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:26.480376', 'step': 7891, 'epoch': 2} {'type': 'loss', 'content': 0.1807212084531784, 'timestamp': '2025-10-01 04:22:26.504004', 'step': 7892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:26.534418', 'step': 7892, 'epoch': 2} {'type': 'loss', 'content': 0.09255552291870117, 'timestamp': '2025-10-01 04:22:26.536927', 'step': 7893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:26.567346', 'step': 7893, 'epoch': 2} {'type': 'loss', 'content': 0.06448215991258621, 'timestamp': '2025-10-01 04:22:26.569496', 'step': 7894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:26.600675', 'step': 7894, 'epoch': 2} {'type': 'loss', 'content': 0.17720983922481537, 'timestamp': '2025-10-01 04:22:26.602804', 'step': 7895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:26.632518', 'step': 7895, 'epoch': 2} {'type': 'loss', 'content': 0.10791894048452377, 'timestamp': '2025-10-01 04:22:26.656243', 'step': 7896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:26.691865', 'step': 7896, 'epoch': 2} {'type': 'loss', 'content': 0.17026452720165253, 'timestamp': '2025-10-01 04:22:26.694155', 'step': 7897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:26.724153', 'step': 7897, 'epoch': 2} {'type': 'loss', 'content': 0.14696051180362701, 'timestamp': '2025-10-01 04:22:26.726745', 'step': 7898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:26.756799', 'step': 7898, 'epoch': 2} {'type': 'loss', 'content': 0.07252001017332077, 'timestamp': '2025-10-01 04:22:26.761908', 'step': 7899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:26.792584', 'step': 7899, 'epoch': 2} {'type': 'loss', 'content': 0.18269628286361694, 'timestamp': '2025-10-01 04:22:26.816272', 'step': 7900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:26.845892', 'step': 7900, 'epoch': 2} {'type': 'loss', 'content': 0.11265169084072113, 'timestamp': '2025-10-01 04:22:26.848454', 'step': 7901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:26.880195', 'step': 7901, 'epoch': 2} {'type': 'loss', 'content': 0.08513867110013962, 'timestamp': '2025-10-01 04:22:26.883397', 'step': 7902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:26.914085', 'step': 7902, 'epoch': 2} {'type': 'loss', 'content': 0.13557696342468262, 'timestamp': '2025-10-01 04:22:26.916237', 'step': 7903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:26.947720', 'step': 7903, 'epoch': 2} {'type': 'loss', 'content': 0.17637255787849426, 'timestamp': '2025-10-01 04:22:26.971262', 'step': 7904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:27.002049', 'step': 7904, 'epoch': 2} {'type': 'loss', 'content': 0.16285450756549835, 'timestamp': '2025-10-01 04:22:27.004442', 'step': 7905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:27.034140', 'step': 7905, 'epoch': 2} {'type': 'loss', 'content': 0.13866408169269562, 'timestamp': '2025-10-01 04:22:27.036753', 'step': 7906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:27.066791', 'step': 7906, 'epoch': 2} {'type': 'loss', 'content': 0.1821347028017044, 'timestamp': '2025-10-01 04:22:27.069749', 'step': 7907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:27.102905', 'step': 7907, 'epoch': 2} {'type': 'loss', 'content': 0.12126648426055908, 'timestamp': '2025-10-01 04:22:27.126699', 'step': 7908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:27.156629', 'step': 7908, 'epoch': 2} {'type': 'loss', 'content': 0.14408941566944122, 'timestamp': '2025-10-01 04:22:27.158754', 'step': 7909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:27.189693', 'step': 7909, 'epoch': 2} {'type': 'loss', 'content': 0.103508859872818, 'timestamp': '2025-10-01 04:22:27.191923', 'step': 7910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:27.222546', 'step': 7910, 'epoch': 2} {'type': 'loss', 'content': 0.113536536693573, 'timestamp': '2025-10-01 04:22:27.224774', 'step': 7911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:27.255099', 'step': 7911, 'epoch': 2} {'type': 'loss', 'content': 0.10654813051223755, 'timestamp': '2025-10-01 04:22:27.278996', 'step': 7912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:27.312536', 'step': 7912, 'epoch': 2} {'type': 'loss', 'content': 0.15609388053417206, 'timestamp': '2025-10-01 04:22:27.314667', 'step': 7913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:27.345382', 'step': 7913, 'epoch': 2} {'type': 'loss', 'content': 0.15924522280693054, 'timestamp': '2025-10-01 04:22:27.348350', 'step': 7914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:27.378977', 'step': 7914, 'epoch': 2} {'type': 'loss', 'content': 0.13804498314857483, 'timestamp': '2025-10-01 04:22:27.381347', 'step': 7915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:27.412048', 'step': 7915, 'epoch': 2} {'type': 'loss', 'content': 0.0860375463962555, 'timestamp': '2025-10-01 04:22:27.436112', 'step': 7916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:27.467363', 'step': 7916, 'epoch': 2} {'type': 'loss', 'content': 0.15424315631389618, 'timestamp': '2025-10-01 04:22:27.469642', 'step': 7917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:27.500374', 'step': 7917, 'epoch': 2} {'type': 'loss', 'content': 0.09748342633247375, 'timestamp': '2025-10-01 04:22:27.502702', 'step': 7918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:27.535871', 'step': 7918, 'epoch': 2} {'type': 'loss', 'content': 0.11918516457080841, 'timestamp': '2025-10-01 04:22:27.539316', 'step': 7919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:27.576941', 'step': 7919, 'epoch': 2} {'type': 'loss', 'content': 0.1803843379020691, 'timestamp': '2025-10-01 04:22:27.600772', 'step': 7920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:27.631800', 'step': 7920, 'epoch': 2} {'type': 'loss', 'content': 0.11036447435617447, 'timestamp': '2025-10-01 04:22:27.636940', 'step': 7921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:27.667742', 'step': 7921, 'epoch': 2} {'type': 'loss', 'content': 0.15278035402297974, 'timestamp': '2025-10-01 04:22:27.669977', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:22:37.017743', 'step': 7922, 'epoch': 2} {'type': 'pplx', 'content': 11493.059007730446, 'timestamp': '2025-10-01 04:22:37.020632', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:37.052335', 'step': 7922, 'epoch': 2} {'type': 'loss', 'content': 0.05635300651192665, 'timestamp': '2025-10-01 04:22:37.054545', 'step': 7923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:37.097742', 'step': 7923, 'epoch': 2} {'type': 'loss', 'content': 0.14475081861019135, 'timestamp': '2025-10-01 04:22:37.122162', 'step': 7924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:37.155942', 'step': 7924, 'epoch': 2} {'type': 'loss', 'content': 0.09785891324281693, 'timestamp': '2025-10-01 04:22:37.158028', 'step': 7925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:37.193089', 'step': 7925, 'epoch': 2} {'type': 'loss', 'content': 0.08515600860118866, 'timestamp': '2025-10-01 04:22:37.195647', 'step': 7926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:37.230094', 'step': 7926, 'epoch': 2} {'type': 'loss', 'content': 0.1068442314863205, 'timestamp': '2025-10-01 04:22:37.232293', 'step': 7927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:37.269115', 'step': 7927, 'epoch': 2} {'type': 'loss', 'content': 0.13197386264801025, 'timestamp': '2025-10-01 04:22:37.293613', 'step': 7928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:37.335637', 'step': 7928, 'epoch': 2} {'type': 'loss', 'content': 0.15275417268276215, 'timestamp': '2025-10-01 04:22:37.346621', 'step': 7929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:37.381798', 'step': 7929, 'epoch': 2} {'type': 'loss', 'content': 0.06885349750518799, 'timestamp': '2025-10-01 04:22:37.383996', 'step': 7930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:37.431804', 'step': 7930, 'epoch': 2} {'type': 'loss', 'content': 0.19118256866931915, 'timestamp': '2025-10-01 04:22:37.434008', 'step': 7931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:37.469977', 'step': 7931, 'epoch': 2} {'type': 'loss', 'content': 0.1308109611272812, 'timestamp': '2025-10-01 04:22:37.493585', 'step': 7932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:37.533537', 'step': 7932, 'epoch': 2} {'type': 'loss', 'content': 0.09215939790010452, 'timestamp': '2025-10-01 04:22:37.535593', 'step': 7933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:37.580577', 'step': 7933, 'epoch': 2} {'type': 'loss', 'content': 0.05148734524846077, 'timestamp': '2025-10-01 04:22:37.582748', 'step': 7934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:37.616497', 'step': 7934, 'epoch': 2} {'type': 'loss', 'content': 0.18350084125995636, 'timestamp': '2025-10-01 04:22:37.618734', 'step': 7935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:37.649991', 'step': 7935, 'epoch': 2} {'type': 'loss', 'content': 0.04745705798268318, 'timestamp': '2025-10-01 04:22:37.673700', 'step': 7936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:37.712069', 'step': 7936, 'epoch': 2} {'type': 'loss', 'content': 0.13670922815799713, 'timestamp': '2025-10-01 04:22:37.714111', 'step': 7937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:37.750343', 'step': 7937, 'epoch': 2} {'type': 'loss', 'content': 0.09485842287540436, 'timestamp': '2025-10-01 04:22:37.752575', 'step': 7938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:37.788996', 'step': 7938, 'epoch': 2} {'type': 'loss', 'content': 0.11705143749713898, 'timestamp': '2025-10-01 04:22:37.791829', 'step': 7939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:37.823204', 'step': 7939, 'epoch': 2} {'type': 'loss', 'content': 0.2586480975151062, 'timestamp': '2025-10-01 04:22:37.846877', 'step': 7940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:22:37.886952', 'step': 7940, 'epoch': 2} {'type': 'loss', 'content': 0.14029167592525482, 'timestamp': '2025-10-01 04:22:37.890081', 'step': 7941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:37.921368', 'step': 7941, 'epoch': 2} {'type': 'loss', 'content': 0.1297316551208496, 'timestamp': '2025-10-01 04:22:37.923542', 'step': 7942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:37.961585', 'step': 7942, 'epoch': 2} {'type': 'loss', 'content': 0.08092205226421356, 'timestamp': '2025-10-01 04:22:37.963685', 'step': 7943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:37.995550', 'step': 7943, 'epoch': 2} {'type': 'loss', 'content': 0.16574430465698242, 'timestamp': '2025-10-01 04:22:38.019153', 'step': 7944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:38.051097', 'step': 7944, 'epoch': 2} {'type': 'loss', 'content': 0.07633600383996964, 'timestamp': '2025-10-01 04:22:38.053015', 'step': 7945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:38.085550', 'step': 7945, 'epoch': 2} {'type': 'loss', 'content': 0.0800006091594696, 'timestamp': '2025-10-01 04:22:38.087752', 'step': 7946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:38.119750', 'step': 7946, 'epoch': 2} {'type': 'loss', 'content': 0.038013312965631485, 'timestamp': '2025-10-01 04:22:38.122110', 'step': 7947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:38.156406', 'step': 7947, 'epoch': 2} {'type': 'loss', 'content': 0.16930291056632996, 'timestamp': '2025-10-01 04:22:38.179938', 'step': 7948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:38.212230', 'step': 7948, 'epoch': 2} {'type': 'loss', 'content': 0.14437733590602875, 'timestamp': '2025-10-01 04:22:38.214378', 'step': 7949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:38.247288', 'step': 7949, 'epoch': 2} {'type': 'loss', 'content': 0.15994592010974884, 'timestamp': '2025-10-01 04:22:38.250046', 'step': 7950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:38.282449', 'step': 7950, 'epoch': 2} {'type': 'loss', 'content': 0.18100771307945251, 'timestamp': '2025-10-01 04:22:38.284553', 'step': 7951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:38.315708', 'step': 7951, 'epoch': 2} {'type': 'loss', 'content': 0.09256017953157425, 'timestamp': '2025-10-01 04:22:38.339339', 'step': 7952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:38.376855', 'step': 7952, 'epoch': 2} {'type': 'loss', 'content': 0.09226645529270172, 'timestamp': '2025-10-01 04:22:38.388292', 'step': 7953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:38.419286', 'step': 7953, 'epoch': 2} {'type': 'loss', 'content': 0.1380404233932495, 'timestamp': '2025-10-01 04:22:38.421427', 'step': 7954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:38.453330', 'step': 7954, 'epoch': 2} {'type': 'loss', 'content': 0.16614703834056854, 'timestamp': '2025-10-01 04:22:38.455816', 'step': 7955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:38.492066', 'step': 7955, 'epoch': 2} {'type': 'loss', 'content': 0.19231978058815002, 'timestamp': '2025-10-01 04:22:38.515948', 'step': 7956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:38.547922', 'step': 7956, 'epoch': 2} {'type': 'loss', 'content': 0.13173885643482208, 'timestamp': '2025-10-01 04:22:38.550525', 'step': 7957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:38.584692', 'step': 7957, 'epoch': 2} {'type': 'loss', 'content': 0.11946763843297958, 'timestamp': '2025-10-01 04:22:38.589022', 'step': 7958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:38.621416', 'step': 7958, 'epoch': 2} {'type': 'loss', 'content': 0.047086138278245926, 'timestamp': '2025-10-01 04:22:38.623644', 'step': 7959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:38.656404', 'step': 7959, 'epoch': 2} {'type': 'loss', 'content': 0.05614385008811951, 'timestamp': '2025-10-01 04:22:38.679970', 'step': 7960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:38.719132', 'step': 7960, 'epoch': 2} {'type': 'loss', 'content': 0.052560728043317795, 'timestamp': '2025-10-01 04:22:38.721328', 'step': 7961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:38.759353', 'step': 7961, 'epoch': 2} {'type': 'loss', 'content': 0.15600839257240295, 'timestamp': '2025-10-01 04:22:38.761998', 'step': 7962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:38.797895', 'step': 7962, 'epoch': 2} {'type': 'loss', 'content': 0.09420451521873474, 'timestamp': '2025-10-01 04:22:38.801028', 'step': 7963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:38.835985', 'step': 7963, 'epoch': 2} {'type': 'loss', 'content': 0.06818152219057083, 'timestamp': '2025-10-01 04:22:38.866990', 'step': 7964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:38.914834', 'step': 7964, 'epoch': 2} {'type': 'loss', 'content': 0.07454019039869308, 'timestamp': '2025-10-01 04:22:38.917125', 'step': 7965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:38.951151', 'step': 7965, 'epoch': 2} {'type': 'loss', 'content': 0.13907143473625183, 'timestamp': '2025-10-01 04:22:38.953861', 'step': 7966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:38.987127', 'step': 7966, 'epoch': 2} {'type': 'loss', 'content': 0.17565426230430603, 'timestamp': '2025-10-01 04:22:38.990018', 'step': 7967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:39.022712', 'step': 7967, 'epoch': 2} {'type': 'loss', 'content': 0.07540572434663773, 'timestamp': '2025-10-01 04:22:39.046486', 'step': 7968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:39.078214', 'step': 7968, 'epoch': 2} {'type': 'loss', 'content': 0.13128918409347534, 'timestamp': '2025-10-01 04:22:39.080583', 'step': 7969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:39.119624', 'step': 7969, 'epoch': 2} {'type': 'loss', 'content': 0.07813703268766403, 'timestamp': '2025-10-01 04:22:39.121643', 'step': 7970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:39.158035', 'step': 7970, 'epoch': 2} {'type': 'loss', 'content': 0.07014387845993042, 'timestamp': '2025-10-01 04:22:39.160278', 'step': 7971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:39.201628', 'step': 7971, 'epoch': 2} {'type': 'loss', 'content': 0.08827804028987885, 'timestamp': '2025-10-01 04:22:39.225169', 'step': 7972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:39.258046', 'step': 7972, 'epoch': 2} {'type': 'loss', 'content': 0.20455144345760345, 'timestamp': '2025-10-01 04:22:39.260358', 'step': 7973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:39.292990', 'step': 7973, 'epoch': 2} {'type': 'loss', 'content': 0.12818513810634613, 'timestamp': '2025-10-01 04:22:39.295334', 'step': 7974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:39.327100', 'step': 7974, 'epoch': 2} {'type': 'loss', 'content': 0.11852439492940903, 'timestamp': '2025-10-01 04:22:39.329653', 'step': 7975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:39.362945', 'step': 7975, 'epoch': 2} {'type': 'loss', 'content': 0.11879099905490875, 'timestamp': '2025-10-01 04:22:39.387116', 'step': 7976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:39.421113', 'step': 7976, 'epoch': 2} {'type': 'loss', 'content': 0.14209315180778503, 'timestamp': '2025-10-01 04:22:39.423653', 'step': 7977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:39.456991', 'step': 7977, 'epoch': 2} {'type': 'loss', 'content': 0.08446703106164932, 'timestamp': '2025-10-01 04:22:39.459777', 'step': 7978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:39.493043', 'step': 7978, 'epoch': 2} {'type': 'loss', 'content': 0.14703457057476044, 'timestamp': '2025-10-01 04:22:39.496032', 'step': 7979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:39.527999', 'step': 7979, 'epoch': 2} {'type': 'loss', 'content': 0.08057865500450134, 'timestamp': '2025-10-01 04:22:39.552113', 'step': 7980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:39.583130', 'step': 7980, 'epoch': 2} {'type': 'loss', 'content': 0.09644883871078491, 'timestamp': '2025-10-01 04:22:39.585388', 'step': 7981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:39.617865', 'step': 7981, 'epoch': 2} {'type': 'loss', 'content': 0.23951971530914307, 'timestamp': '2025-10-01 04:22:39.619847', 'step': 7982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:39.656901', 'step': 7982, 'epoch': 2} {'type': 'loss', 'content': 0.11010971665382385, 'timestamp': '2025-10-01 04:22:39.659175', 'step': 7983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:39.692228', 'step': 7983, 'epoch': 2} {'type': 'loss', 'content': 0.05284995958209038, 'timestamp': '2025-10-01 04:22:39.715943', 'step': 7984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:39.747228', 'step': 7984, 'epoch': 2} {'type': 'loss', 'content': 0.06074332818388939, 'timestamp': '2025-10-01 04:22:39.749287', 'step': 7985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:39.792047', 'step': 7985, 'epoch': 2} {'type': 'loss', 'content': 0.16711096465587616, 'timestamp': '2025-10-01 04:22:39.794871', 'step': 7986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:39.827132', 'step': 7986, 'epoch': 2} {'type': 'loss', 'content': 0.13450120389461517, 'timestamp': '2025-10-01 04:22:39.830201', 'step': 7987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:22:39.863051', 'step': 7987, 'epoch': 2} {'type': 'loss', 'content': 0.19225966930389404, 'timestamp': '2025-10-01 04:22:39.888465', 'step': 7988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:39.920712', 'step': 7988, 'epoch': 2} {'type': 'loss', 'content': 0.13023844361305237, 'timestamp': '2025-10-01 04:22:39.924167', 'step': 7989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:39.956783', 'step': 7989, 'epoch': 2} {'type': 'loss', 'content': 0.14432622492313385, 'timestamp': '2025-10-01 04:22:39.959196', 'step': 7990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:39.998962', 'step': 7990, 'epoch': 2} {'type': 'loss', 'content': 0.09977211058139801, 'timestamp': '2025-10-01 04:22:40.001048', 'step': 7991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:40.032809', 'step': 7991, 'epoch': 2} {'type': 'loss', 'content': 0.05708356201648712, 'timestamp': '2025-10-01 04:22:40.056703', 'step': 7992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:22:40.088967', 'step': 7992, 'epoch': 2} {'type': 'loss', 'content': 0.11681938171386719, 'timestamp': '2025-10-01 04:22:40.091318', 'step': 7993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:22:40.123886', 'step': 7993, 'epoch': 2} {'type': 'loss', 'content': 0.09664329141378403, 'timestamp': '2025-10-01 04:22:40.128288', 'step': 7994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:40.166131', 'step': 7994, 'epoch': 2} {'type': 'loss', 'content': 0.2098274677991867, 'timestamp': '2025-10-01 04:22:40.168061', 'step': 7995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:40.203984', 'step': 7995, 'epoch': 2} {'type': 'loss', 'content': 0.1801932007074356, 'timestamp': '2025-10-01 04:22:40.227412', 'step': 7996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:40.260564', 'step': 7996, 'epoch': 2} {'type': 'loss', 'content': 0.08267660439014435, 'timestamp': '2025-10-01 04:22:40.262355', 'step': 7997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:40.294697', 'step': 7997, 'epoch': 2} {'type': 'loss', 'content': 0.0875084176659584, 'timestamp': '2025-10-01 04:22:40.297607', 'step': 7998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:40.329509', 'step': 7998, 'epoch': 2} {'type': 'loss', 'content': 0.21666882932186127, 'timestamp': '2025-10-01 04:22:40.331645', 'step': 7999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:40.365132', 'step': 7999, 'epoch': 2} {'type': 'loss', 'content': 0.13652871549129486, 'timestamp': '2025-10-01 04:22:40.388734', 'step': 8000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8000', 'timestamp': '2025-10-01 04:22:45.885502', 'step': 8000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:45.927699', 'step': 8000, 'epoch': 2} {'type': 'loss', 'content': 0.14341682195663452, 'timestamp': '2025-10-01 04:22:45.929756', 'step': 8001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:45.962654', 'step': 8001, 'epoch': 2} {'type': 'loss', 'content': 0.1192309707403183, 'timestamp': '2025-10-01 04:22:45.965273', 'step': 8002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.000014', 'step': 8002, 'epoch': 2} {'type': 'loss', 'content': 0.09185382723808289, 'timestamp': '2025-10-01 04:22:46.004894', 'step': 8003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:46.045826', 'step': 8003, 'epoch': 2} {'type': 'loss', 'content': 0.07134635001420975, 'timestamp': '2025-10-01 04:22:46.069936', 'step': 8004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:46.107393', 'step': 8004, 'epoch': 2} {'type': 'loss', 'content': 0.12078943848609924, 'timestamp': '2025-10-01 04:22:46.109875', 'step': 8005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.141356', 'step': 8005, 'epoch': 2} {'type': 'loss', 'content': 0.1437750607728958, 'timestamp': '2025-10-01 04:22:46.144223', 'step': 8006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:46.179169', 'step': 8006, 'epoch': 2} {'type': 'loss', 'content': 0.11218329519033432, 'timestamp': '2025-10-01 04:22:46.181636', 'step': 8007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:46.219419', 'step': 8007, 'epoch': 2} {'type': 'loss', 'content': 0.15595491230487823, 'timestamp': '2025-10-01 04:22:46.243087', 'step': 8008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:46.275191', 'step': 8008, 'epoch': 2} {'type': 'loss', 'content': 0.13269969820976257, 'timestamp': '2025-10-01 04:22:46.277332', 'step': 8009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.314628', 'step': 8009, 'epoch': 2} {'type': 'loss', 'content': 0.07182919234037399, 'timestamp': '2025-10-01 04:22:46.317283', 'step': 8010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.348063', 'step': 8010, 'epoch': 2} {'type': 'loss', 'content': 0.07839025557041168, 'timestamp': '2025-10-01 04:22:46.350281', 'step': 8011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.383185', 'step': 8011, 'epoch': 2} {'type': 'loss', 'content': 0.17532244324684143, 'timestamp': '2025-10-01 04:22:46.412343', 'step': 8012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:46.453558', 'step': 8012, 'epoch': 2} {'type': 'loss', 'content': 0.13893869519233704, 'timestamp': '2025-10-01 04:22:46.455687', 'step': 8013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:46.486435', 'step': 8013, 'epoch': 2} {'type': 'loss', 'content': 0.11498311161994934, 'timestamp': '2025-10-01 04:22:46.488569', 'step': 8014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.522683', 'step': 8014, 'epoch': 2} {'type': 'loss', 'content': 0.10549288243055344, 'timestamp': '2025-10-01 04:22:46.524888', 'step': 8015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.558181', 'step': 8015, 'epoch': 2} {'type': 'loss', 'content': 0.10887650400400162, 'timestamp': '2025-10-01 04:22:46.581770', 'step': 8016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.614876', 'step': 8016, 'epoch': 2} {'type': 'loss', 'content': 0.09217359870672226, 'timestamp': '2025-10-01 04:22:46.616955', 'step': 8017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:46.647525', 'step': 8017, 'epoch': 2} {'type': 'loss', 'content': 0.058387551456689835, 'timestamp': '2025-10-01 04:22:46.649860', 'step': 8018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.681061', 'step': 8018, 'epoch': 2} {'type': 'loss', 'content': 0.09451455622911453, 'timestamp': '2025-10-01 04:22:46.683332', 'step': 8019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:46.721180', 'step': 8019, 'epoch': 2} {'type': 'loss', 'content': 0.12973268330097198, 'timestamp': '2025-10-01 04:22:46.744793', 'step': 8020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.781506', 'step': 8020, 'epoch': 2} {'type': 'loss', 'content': 0.17947639524936676, 'timestamp': '2025-10-01 04:22:46.783775', 'step': 8021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:46.823460', 'step': 8021, 'epoch': 2} {'type': 'loss', 'content': 0.05925477668642998, 'timestamp': '2025-10-01 04:22:46.825746', 'step': 8022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:46.856878', 'step': 8022, 'epoch': 2} {'type': 'loss', 'content': 0.10687827318906784, 'timestamp': '2025-10-01 04:22:46.859442', 'step': 8023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:46.896500', 'step': 8023, 'epoch': 2} {'type': 'loss', 'content': 0.2067391574382782, 'timestamp': '2025-10-01 04:22:46.920025', 'step': 8024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:22:46.954608', 'step': 8024, 'epoch': 2} {'type': 'loss', 'content': 0.16800819337368011, 'timestamp': '2025-10-01 04:22:46.957023', 'step': 8025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:46.993847', 'step': 8025, 'epoch': 2} {'type': 'loss', 'content': 0.13525885343551636, 'timestamp': '2025-10-01 04:22:46.995935', 'step': 8026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:47.026858', 'step': 8026, 'epoch': 2} {'type': 'loss', 'content': 0.18374168872833252, 'timestamp': '2025-10-01 04:22:47.028931', 'step': 8027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:47.065528', 'step': 8027, 'epoch': 2} {'type': 'loss', 'content': 0.08830118924379349, 'timestamp': '2025-10-01 04:22:47.089015', 'step': 8028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:47.121324', 'step': 8028, 'epoch': 2} {'type': 'loss', 'content': 0.1278807818889618, 'timestamp': '2025-10-01 04:22:47.123549', 'step': 8029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:47.155196', 'step': 8029, 'epoch': 2} {'type': 'loss', 'content': 0.13144777715206146, 'timestamp': '2025-10-01 04:22:47.157455', 'step': 8030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:47.198046', 'step': 8030, 'epoch': 2} {'type': 'loss', 'content': 0.1321662813425064, 'timestamp': '2025-10-01 04:22:47.200105', 'step': 8031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:47.232018', 'step': 8031, 'epoch': 2} {'type': 'loss', 'content': 0.07152079045772552, 'timestamp': '2025-10-01 04:22:47.255708', 'step': 8032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:47.287321', 'step': 8032, 'epoch': 2} {'type': 'loss', 'content': 0.06466148048639297, 'timestamp': '2025-10-01 04:22:47.289498', 'step': 8033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:47.321507', 'step': 8033, 'epoch': 2} {'type': 'loss', 'content': 0.28099414706230164, 'timestamp': '2025-10-01 04:22:47.324004', 'step': 8034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:47.356161', 'step': 8034, 'epoch': 2} {'type': 'loss', 'content': 0.06718429177999496, 'timestamp': '2025-10-01 04:22:47.358384', 'step': 8035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:47.390410', 'step': 8035, 'epoch': 2} {'type': 'loss', 'content': 0.09894049167633057, 'timestamp': '2025-10-01 04:22:47.414359', 'step': 8036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:47.448842', 'step': 8036, 'epoch': 2} {'type': 'loss', 'content': 0.06478269398212433, 'timestamp': '2025-10-01 04:22:47.451136', 'step': 8037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:47.484064', 'step': 8037, 'epoch': 2} {'type': 'loss', 'content': 0.21650230884552002, 'timestamp': '2025-10-01 04:22:47.497805', 'step': 8038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:47.529253', 'step': 8038, 'epoch': 2} {'type': 'loss', 'content': 0.19356738030910492, 'timestamp': '2025-10-01 04:22:47.531407', 'step': 8039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:47.565917', 'step': 8039, 'epoch': 2} {'type': 'loss', 'content': 0.14449629187583923, 'timestamp': '2025-10-01 04:22:47.589706', 'step': 8040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:47.621496', 'step': 8040, 'epoch': 2} {'type': 'loss', 'content': 0.08770681917667389, 'timestamp': '2025-10-01 04:22:47.624884', 'step': 8041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:47.657702', 'step': 8041, 'epoch': 2} {'type': 'loss', 'content': 0.0785922110080719, 'timestamp': '2025-10-01 04:22:47.659988', 'step': 8042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:47.690961', 'step': 8042, 'epoch': 2} {'type': 'loss', 'content': 0.08741103112697601, 'timestamp': '2025-10-01 04:22:47.703816', 'step': 8043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:47.741324', 'step': 8043, 'epoch': 2} {'type': 'loss', 'content': 0.18607959151268005, 'timestamp': '2025-10-01 04:22:47.764945', 'step': 8044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:47.798296', 'step': 8044, 'epoch': 2} {'type': 'loss', 'content': 0.1585221290588379, 'timestamp': '2025-10-01 04:22:47.800399', 'step': 8045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:22:47.839417', 'step': 8045, 'epoch': 2} {'type': 'loss', 'content': 0.09875889122486115, 'timestamp': '2025-10-01 04:22:47.846576', 'step': 8046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:47.877532', 'step': 8046, 'epoch': 2} {'type': 'loss', 'content': 0.11680043488740921, 'timestamp': '2025-10-01 04:22:47.879674', 'step': 8047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:47.923714', 'step': 8047, 'epoch': 2} {'type': 'loss', 'content': 0.1261274367570877, 'timestamp': '2025-10-01 04:22:47.947327', 'step': 8048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:47.979927', 'step': 8048, 'epoch': 2} {'type': 'loss', 'content': 0.20756307244300842, 'timestamp': '2025-10-01 04:22:47.982199', 'step': 8049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:48.015835', 'step': 8049, 'epoch': 2} {'type': 'loss', 'content': 0.1134733259677887, 'timestamp': '2025-10-01 04:22:48.018235', 'step': 8050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:48.053553', 'step': 8050, 'epoch': 2} {'type': 'loss', 'content': 0.12553460896015167, 'timestamp': '2025-10-01 04:22:48.056541', 'step': 8051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:22:48.092323', 'step': 8051, 'epoch': 2} {'type': 'loss', 'content': 0.12301105260848999, 'timestamp': '2025-10-01 04:22:48.120558', 'step': 8052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:48.158151', 'step': 8052, 'epoch': 2} {'type': 'loss', 'content': 0.1393713802099228, 'timestamp': '2025-10-01 04:22:48.160397', 'step': 8053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:48.194931', 'step': 8053, 'epoch': 2} {'type': 'loss', 'content': 0.10619591921567917, 'timestamp': '2025-10-01 04:22:48.198025', 'step': 8054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:48.230969', 'step': 8054, 'epoch': 2} {'type': 'loss', 'content': 0.12394954264163971, 'timestamp': '2025-10-01 04:22:48.233135', 'step': 8055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:48.264462', 'step': 8055, 'epoch': 2} {'type': 'loss', 'content': 0.24614469707012177, 'timestamp': '2025-10-01 04:22:48.288109', 'step': 8056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:48.319783', 'step': 8056, 'epoch': 2} {'type': 'loss', 'content': 0.12346315383911133, 'timestamp': '2025-10-01 04:22:48.321957', 'step': 8057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:48.356983', 'step': 8057, 'epoch': 2} {'type': 'loss', 'content': 0.14451029896736145, 'timestamp': '2025-10-01 04:22:48.359190', 'step': 8058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:48.395176', 'step': 8058, 'epoch': 2} {'type': 'loss', 'content': 0.123556949198246, 'timestamp': '2025-10-01 04:22:48.397289', 'step': 8059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:48.429097', 'step': 8059, 'epoch': 2} {'type': 'loss', 'content': 0.13050585985183716, 'timestamp': '2025-10-01 04:22:48.452772', 'step': 8060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:48.484306', 'step': 8060, 'epoch': 2} {'type': 'loss', 'content': 0.08603087812662125, 'timestamp': '2025-10-01 04:22:48.486521', 'step': 8061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:48.518813', 'step': 8061, 'epoch': 2} {'type': 'loss', 'content': 0.23270127177238464, 'timestamp': '2025-10-01 04:22:48.521424', 'step': 8062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:48.553163', 'step': 8062, 'epoch': 2} {'type': 'loss', 'content': 0.13813886046409607, 'timestamp': '2025-10-01 04:22:48.555428', 'step': 8063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:48.591920', 'step': 8063, 'epoch': 2} {'type': 'loss', 'content': 0.14621421694755554, 'timestamp': '2025-10-01 04:22:48.620595', 'step': 8064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:48.659099', 'step': 8064, 'epoch': 2} {'type': 'loss', 'content': 0.1361960768699646, 'timestamp': '2025-10-01 04:22:48.661399', 'step': 8065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:48.701397', 'step': 8065, 'epoch': 2} {'type': 'loss', 'content': 0.08832357823848724, 'timestamp': '2025-10-01 04:22:48.703605', 'step': 8066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:48.737695', 'step': 8066, 'epoch': 2} {'type': 'loss', 'content': 0.06237412616610527, 'timestamp': '2025-10-01 04:22:48.739860', 'step': 8067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:48.776685', 'step': 8067, 'epoch': 2} {'type': 'loss', 'content': 0.1663280427455902, 'timestamp': '2025-10-01 04:22:48.800287', 'step': 8068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:48.831650', 'step': 8068, 'epoch': 2} {'type': 'loss', 'content': 0.03484794870018959, 'timestamp': '2025-10-01 04:22:48.833795', 'step': 8069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:48.865982', 'step': 8069, 'epoch': 2} {'type': 'loss', 'content': 0.14364773035049438, 'timestamp': '2025-10-01 04:22:48.868194', 'step': 8070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:48.901021', 'step': 8070, 'epoch': 2} {'type': 'loss', 'content': 0.16402924060821533, 'timestamp': '2025-10-01 04:22:48.903546', 'step': 8071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:48.939968', 'step': 8071, 'epoch': 2} {'type': 'loss', 'content': 0.17439228296279907, 'timestamp': '2025-10-01 04:22:48.963402', 'step': 8072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:49', 'step': 8072, 'epoch': 2} {'type': 'loss', 'content': 0.0823136419057846, 'timestamp': '2025-10-01 04:22:49.002228', 'step': 8073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:49.033573', 'step': 8073, 'epoch': 2} {'type': 'loss', 'content': 0.09841625392436981, 'timestamp': '2025-10-01 04:22:49.035750', 'step': 8074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:49.071367', 'step': 8074, 'epoch': 2} {'type': 'loss', 'content': 0.17756380140781403, 'timestamp': '2025-10-01 04:22:49.073584', 'step': 8075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:49.108512', 'step': 8075, 'epoch': 2} {'type': 'loss', 'content': 0.14282797276973724, 'timestamp': '2025-10-01 04:22:49.132144', 'step': 8076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:49.163630', 'step': 8076, 'epoch': 2} {'type': 'loss', 'content': 0.1523265689611435, 'timestamp': '2025-10-01 04:22:49.166074', 'step': 8077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:49.204253', 'step': 8077, 'epoch': 2} {'type': 'loss', 'content': 0.18042400479316711, 'timestamp': '2025-10-01 04:22:49.206299', 'step': 8078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:49.242627', 'step': 8078, 'epoch': 2} {'type': 'loss', 'content': 0.15678972005844116, 'timestamp': '2025-10-01 04:22:49.245043', 'step': 8079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:49.276307', 'step': 8079, 'epoch': 2} {'type': 'loss', 'content': 0.17204992473125458, 'timestamp': '2025-10-01 04:22:49.300069', 'step': 8080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:49.330877', 'step': 8080, 'epoch': 2} {'type': 'loss', 'content': 0.13705481588840485, 'timestamp': '2025-10-01 04:22:49.333051', 'step': 8081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:49.367638', 'step': 8081, 'epoch': 2} {'type': 'loss', 'content': 0.17222866415977478, 'timestamp': '2025-10-01 04:22:49.370061', 'step': 8082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:49.400302', 'step': 8082, 'epoch': 2} {'type': 'loss', 'content': 0.17456233501434326, 'timestamp': '2025-10-01 04:22:49.402431', 'step': 8083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:49.434273', 'step': 8083, 'epoch': 2} {'type': 'loss', 'content': 0.14211049675941467, 'timestamp': '2025-10-01 04:22:49.457941', 'step': 8084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:49.488184', 'step': 8084, 'epoch': 2} {'type': 'loss', 'content': 0.10211175680160522, 'timestamp': '2025-10-01 04:22:49.490347', 'step': 8085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:49.521008', 'step': 8085, 'epoch': 2} {'type': 'loss', 'content': 0.20411714911460876, 'timestamp': '2025-10-01 04:22:49.523022', 'step': 8086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:49.567522', 'step': 8086, 'epoch': 2} {'type': 'loss', 'content': 0.23234562575817108, 'timestamp': '2025-10-01 04:22:49.569609', 'step': 8087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:49.603702', 'step': 8087, 'epoch': 2} {'type': 'loss', 'content': 0.10730869323015213, 'timestamp': '2025-10-01 04:22:49.627320', 'step': 8088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:49.660035', 'step': 8088, 'epoch': 2} {'type': 'loss', 'content': 0.1594143658876419, 'timestamp': '2025-10-01 04:22:49.662089', 'step': 8089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:49.696859', 'step': 8089, 'epoch': 2} {'type': 'loss', 'content': 0.06323819607496262, 'timestamp': '2025-10-01 04:22:49.699069', 'step': 8090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:49.732889', 'step': 8090, 'epoch': 2} {'type': 'loss', 'content': 0.11093783378601074, 'timestamp': '2025-10-01 04:22:49.735072', 'step': 8091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:49.765902', 'step': 8091, 'epoch': 2} {'type': 'loss', 'content': 0.11248026043176651, 'timestamp': '2025-10-01 04:22:49.789594', 'step': 8092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:49.820093', 'step': 8092, 'epoch': 2} {'type': 'loss', 'content': 0.13587312400341034, 'timestamp': '2025-10-01 04:22:49.823686', 'step': 8093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:49.855621', 'step': 8093, 'epoch': 2} {'type': 'loss', 'content': 0.14657895267009735, 'timestamp': '2025-10-01 04:22:49.857860', 'step': 8094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:49.890846', 'step': 8094, 'epoch': 2} {'type': 'loss', 'content': 0.11010686308145523, 'timestamp': '2025-10-01 04:22:49.893324', 'step': 8095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:49.932847', 'step': 8095, 'epoch': 2} {'type': 'loss', 'content': 0.11904726922512054, 'timestamp': '2025-10-01 04:22:49.956566', 'step': 8096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:49.987582', 'step': 8096, 'epoch': 2} {'type': 'loss', 'content': 0.09506823122501373, 'timestamp': '2025-10-01 04:22:49.993976', 'step': 8097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:50.025609', 'step': 8097, 'epoch': 2} {'type': 'loss', 'content': 0.12857182323932648, 'timestamp': '2025-10-01 04:22:50.027600', 'step': 8098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.064744', 'step': 8098, 'epoch': 2} {'type': 'loss', 'content': 0.15984192490577698, 'timestamp': '2025-10-01 04:22:50.066873', 'step': 8099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:50.097434', 'step': 8099, 'epoch': 2} {'type': 'loss', 'content': 0.09828443080186844, 'timestamp': '2025-10-01 04:22:50.120919', 'step': 8100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.151822', 'step': 8100, 'epoch': 2} {'type': 'loss', 'content': 0.0856265053153038, 'timestamp': '2025-10-01 04:22:50.154045', 'step': 8101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.188134', 'step': 8101, 'epoch': 2} {'type': 'loss', 'content': 0.0562749020755291, 'timestamp': '2025-10-01 04:22:50.190502', 'step': 8102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.222573', 'step': 8102, 'epoch': 2} {'type': 'loss', 'content': 0.14125625789165497, 'timestamp': '2025-10-01 04:22:50.224760', 'step': 8103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.261688', 'step': 8103, 'epoch': 2} {'type': 'loss', 'content': 0.12331719696521759, 'timestamp': '2025-10-01 04:22:50.285359', 'step': 8104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:50.318424', 'step': 8104, 'epoch': 2} {'type': 'loss', 'content': 0.14184100925922394, 'timestamp': '2025-10-01 04:22:50.320878', 'step': 8105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.356699', 'step': 8105, 'epoch': 2} {'type': 'loss', 'content': 0.10295210778713226, 'timestamp': '2025-10-01 04:22:50.358861', 'step': 8106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:50.389176', 'step': 8106, 'epoch': 2} {'type': 'loss', 'content': 0.1732470989227295, 'timestamp': '2025-10-01 04:22:50.391405', 'step': 8107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:50.422126', 'step': 8107, 'epoch': 2} {'type': 'loss', 'content': 0.1707427203655243, 'timestamp': '2025-10-01 04:22:50.445917', 'step': 8108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.481500', 'step': 8108, 'epoch': 2} {'type': 'loss', 'content': 0.1631726175546646, 'timestamp': '2025-10-01 04:22:50.483807', 'step': 8109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:50.521098', 'step': 8109, 'epoch': 2} {'type': 'loss', 'content': 0.09932073950767517, 'timestamp': '2025-10-01 04:22:50.523293', 'step': 8110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:50.554099', 'step': 8110, 'epoch': 2} {'type': 'loss', 'content': 0.18222905695438385, 'timestamp': '2025-10-01 04:22:50.557008', 'step': 8111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:50.587796', 'step': 8111, 'epoch': 2} {'type': 'loss', 'content': 0.05344482883810997, 'timestamp': '2025-10-01 04:22:50.611943', 'step': 8112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:50.641898', 'step': 8112, 'epoch': 2} {'type': 'loss', 'content': 0.141214519739151, 'timestamp': '2025-10-01 04:22:50.644181', 'step': 8113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.674347', 'step': 8113, 'epoch': 2} {'type': 'loss', 'content': 0.12037646025419235, 'timestamp': '2025-10-01 04:22:50.676556', 'step': 8114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:50.708045', 'step': 8114, 'epoch': 2} {'type': 'loss', 'content': 0.17247717082500458, 'timestamp': '2025-10-01 04:22:50.710385', 'step': 8115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.747582', 'step': 8115, 'epoch': 2} {'type': 'loss', 'content': 0.0879874899983406, 'timestamp': '2025-10-01 04:22:50.771503', 'step': 8116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:50.802284', 'step': 8116, 'epoch': 2} {'type': 'loss', 'content': 0.10276564210653305, 'timestamp': '2025-10-01 04:22:50.805494', 'step': 8117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.839199', 'step': 8117, 'epoch': 2} {'type': 'loss', 'content': 0.08966351300477982, 'timestamp': '2025-10-01 04:22:50.841309', 'step': 8118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:50.875541', 'step': 8118, 'epoch': 2} {'type': 'loss', 'content': 0.10216961801052094, 'timestamp': '2025-10-01 04:22:50.878186', 'step': 8119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.909110', 'step': 8119, 'epoch': 2} {'type': 'loss', 'content': 0.11551948636770248, 'timestamp': '2025-10-01 04:22:50.932795', 'step': 8120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.963195', 'step': 8120, 'epoch': 2} {'type': 'loss', 'content': 0.1649918109178543, 'timestamp': '2025-10-01 04:22:50.965336', 'step': 8121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:50.996244', 'step': 8121, 'epoch': 2} {'type': 'loss', 'content': 0.1471915990114212, 'timestamp': '2025-10-01 04:22:50.998306', 'step': 8122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:51.030626', 'step': 8122, 'epoch': 2} {'type': 'loss', 'content': 0.20278364419937134, 'timestamp': '2025-10-01 04:22:51.032904', 'step': 8123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:51.063594', 'step': 8123, 'epoch': 2} {'type': 'loss', 'content': 0.10872859507799149, 'timestamp': '2025-10-01 04:22:51.088008', 'step': 8124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:51.119994', 'step': 8124, 'epoch': 2} {'type': 'loss', 'content': 0.11975729465484619, 'timestamp': '2025-10-01 04:22:51.122354', 'step': 8125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:51.153179', 'step': 8125, 'epoch': 2} {'type': 'loss', 'content': 0.12044432014226913, 'timestamp': '2025-10-01 04:22:51.156102', 'step': 8126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:51.187716', 'step': 8126, 'epoch': 2} {'type': 'loss', 'content': 0.1832924634218216, 'timestamp': '2025-10-01 04:22:51.190127', 'step': 8127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:51.220758', 'step': 8127, 'epoch': 2} {'type': 'loss', 'content': 0.1365005373954773, 'timestamp': '2025-10-01 04:22:51.244425', 'step': 8128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:51.275967', 'step': 8128, 'epoch': 2} {'type': 'loss', 'content': 0.23426289856433868, 'timestamp': '2025-10-01 04:22:51.278921', 'step': 8129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:22:51.314847', 'step': 8129, 'epoch': 2} {'type': 'loss', 'content': 0.14483729004859924, 'timestamp': '2025-10-01 04:22:51.320366', 'step': 8130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:51.357476', 'step': 8130, 'epoch': 2} {'type': 'loss', 'content': 0.11873384565114975, 'timestamp': '2025-10-01 04:22:51.359292', 'step': 8131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:51.394413', 'step': 8131, 'epoch': 2} {'type': 'loss', 'content': 0.07400477677583694, 'timestamp': '2025-10-01 04:22:51.418117', 'step': 8132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:51.447617', 'step': 8132, 'epoch': 2} {'type': 'loss', 'content': 0.13589218258857727, 'timestamp': '2025-10-01 04:22:51.449628', 'step': 8133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:51.479919', 'step': 8133, 'epoch': 2} {'type': 'loss', 'content': 0.11827787011861801, 'timestamp': '2025-10-01 04:22:51.481928', 'step': 8134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:51.511754', 'step': 8134, 'epoch': 2} {'type': 'loss', 'content': 0.17918796837329865, 'timestamp': '2025-10-01 04:22:51.513820', 'step': 8135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:51.543476', 'step': 8135, 'epoch': 2} {'type': 'loss', 'content': 0.08950699120759964, 'timestamp': '2025-10-01 04:22:51.567139', 'step': 8136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:51.597615', 'step': 8136, 'epoch': 2} {'type': 'loss', 'content': 0.11142796277999878, 'timestamp': '2025-10-01 04:22:51.599790', 'step': 8137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:51.629851', 'step': 8137, 'epoch': 2} {'type': 'loss', 'content': 0.13611342012882233, 'timestamp': '2025-10-01 04:22:51.638198', 'step': 8138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:51.680341', 'step': 8138, 'epoch': 2} {'type': 'loss', 'content': 0.1242586076259613, 'timestamp': '2025-10-01 04:22:51.683316', 'step': 8139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:51.713004', 'step': 8139, 'epoch': 2} {'type': 'loss', 'content': 0.11299102008342743, 'timestamp': '2025-10-01 04:22:51.736484', 'step': 8140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:51.766207', 'step': 8140, 'epoch': 2} {'type': 'loss', 'content': 0.1124926432967186, 'timestamp': '2025-10-01 04:22:51.768293', 'step': 8141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:51.798328', 'step': 8141, 'epoch': 2} {'type': 'loss', 'content': 0.1285298764705658, 'timestamp': '2025-10-01 04:22:51.800469', 'step': 8142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:51.854846', 'step': 8142, 'epoch': 2} {'type': 'loss', 'content': 0.1408001035451889, 'timestamp': '2025-10-01 04:22:51.858982', 'step': 8143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:51.888573', 'step': 8143, 'epoch': 2} {'type': 'loss', 'content': 0.06969181448221207, 'timestamp': '2025-10-01 04:22:51.913715', 'step': 8144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:51.943558', 'step': 8144, 'epoch': 2} {'type': 'loss', 'content': 0.15933451056480408, 'timestamp': '2025-10-01 04:22:51.945747', 'step': 8145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:51.975502', 'step': 8145, 'epoch': 2} {'type': 'loss', 'content': 0.12510134279727936, 'timestamp': '2025-10-01 04:22:51.979676', 'step': 8146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:52.009927', 'step': 8146, 'epoch': 2} {'type': 'loss', 'content': 0.07570087909698486, 'timestamp': '2025-10-01 04:22:52.012161', 'step': 8147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:52.041650', 'step': 8147, 'epoch': 2} {'type': 'loss', 'content': 0.09094048291444778, 'timestamp': '2025-10-01 04:22:52.065342', 'step': 8148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:52.097063', 'step': 8148, 'epoch': 2} {'type': 'loss', 'content': 0.15897132456302643, 'timestamp': '2025-10-01 04:22:52.107616', 'step': 8149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:52.141059', 'step': 8149, 'epoch': 2} {'type': 'loss', 'content': 0.18377864360809326, 'timestamp': '2025-10-01 04:22:52.143982', 'step': 8150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.178595', 'step': 8150, 'epoch': 2} {'type': 'loss', 'content': 0.14468173682689667, 'timestamp': '2025-10-01 04:22:52.180779', 'step': 8151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:52.217792', 'step': 8151, 'epoch': 2} {'type': 'loss', 'content': 0.13975471258163452, 'timestamp': '2025-10-01 04:22:52.241515', 'step': 8152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.273099', 'step': 8152, 'epoch': 2} {'type': 'loss', 'content': 0.1647852212190628, 'timestamp': '2025-10-01 04:22:52.275318', 'step': 8153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.305665', 'step': 8153, 'epoch': 2} {'type': 'loss', 'content': 0.2449820190668106, 'timestamp': '2025-10-01 04:22:52.308948', 'step': 8154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:52.349427', 'step': 8154, 'epoch': 2} {'type': 'loss', 'content': 0.17184790968894958, 'timestamp': '2025-10-01 04:22:52.351530', 'step': 8155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:52.382287', 'step': 8155, 'epoch': 2} {'type': 'loss', 'content': 0.11127122491598129, 'timestamp': '2025-10-01 04:22:52.405978', 'step': 8156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.436419', 'step': 8156, 'epoch': 2} {'type': 'loss', 'content': 0.13168016076087952, 'timestamp': '2025-10-01 04:22:52.438264', 'step': 8157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:52.470300', 'step': 8157, 'epoch': 2} {'type': 'loss', 'content': 0.1142001748085022, 'timestamp': '2025-10-01 04:22:52.472645', 'step': 8158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.503179', 'step': 8158, 'epoch': 2} {'type': 'loss', 'content': 0.14810775220394135, 'timestamp': '2025-10-01 04:22:52.505468', 'step': 8159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.535923', 'step': 8159, 'epoch': 2} {'type': 'loss', 'content': 0.07040843367576599, 'timestamp': '2025-10-01 04:22:52.559508', 'step': 8160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:52.593928', 'step': 8160, 'epoch': 2} {'type': 'loss', 'content': 0.07864443957805634, 'timestamp': '2025-10-01 04:22:52.596110', 'step': 8161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:52.629060', 'step': 8161, 'epoch': 2} {'type': 'loss', 'content': 0.052861642092466354, 'timestamp': '2025-10-01 04:22:52.631210', 'step': 8162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.661414', 'step': 8162, 'epoch': 2} {'type': 'loss', 'content': 0.14223045110702515, 'timestamp': '2025-10-01 04:22:52.664418', 'step': 8163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:52.694077', 'step': 8163, 'epoch': 2} {'type': 'loss', 'content': 0.21304993331432343, 'timestamp': '2025-10-01 04:22:52.718013', 'step': 8164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:52.747896', 'step': 8164, 'epoch': 2} {'type': 'loss', 'content': 0.14023959636688232, 'timestamp': '2025-10-01 04:22:52.749925', 'step': 8165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.780271', 'step': 8165, 'epoch': 2} {'type': 'loss', 'content': 0.16877420246601105, 'timestamp': '2025-10-01 04:22:52.782413', 'step': 8166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:52.812965', 'step': 8166, 'epoch': 2} {'type': 'loss', 'content': 0.2257883995771408, 'timestamp': '2025-10-01 04:22:52.815211', 'step': 8167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.845873', 'step': 8167, 'epoch': 2} {'type': 'loss', 'content': 0.09765148907899857, 'timestamp': '2025-10-01 04:22:52.869443', 'step': 8168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:52.901798', 'step': 8168, 'epoch': 2} {'type': 'loss', 'content': 0.08501017838716507, 'timestamp': '2025-10-01 04:22:52.903756', 'step': 8169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.934291', 'step': 8169, 'epoch': 2} {'type': 'loss', 'content': 0.11523189395666122, 'timestamp': '2025-10-01 04:22:52.936282', 'step': 8170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:52.965946', 'step': 8170, 'epoch': 2} {'type': 'loss', 'content': 0.14811056852340698, 'timestamp': '2025-10-01 04:22:52.968887', 'step': 8171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:52.999245', 'step': 8171, 'epoch': 2} {'type': 'loss', 'content': 0.11274971812963486, 'timestamp': '2025-10-01 04:22:53.022823', 'step': 8172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:53.052693', 'step': 8172, 'epoch': 2} {'type': 'loss', 'content': 0.11034140735864639, 'timestamp': '2025-10-01 04:22:53.055253', 'step': 8173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:53.085735', 'step': 8173, 'epoch': 2} {'type': 'loss', 'content': 0.1526539921760559, 'timestamp': '2025-10-01 04:22:53.087626', 'step': 8174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:53.117855', 'step': 8174, 'epoch': 2} {'type': 'loss', 'content': 0.09229067713022232, 'timestamp': '2025-10-01 04:22:53.119954', 'step': 8175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:53.150106', 'step': 8175, 'epoch': 2} {'type': 'loss', 'content': 0.09183469414710999, 'timestamp': '2025-10-01 04:22:53.173975', 'step': 8176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:53.204961', 'step': 8176, 'epoch': 2} {'type': 'loss', 'content': 0.041941773146390915, 'timestamp': '2025-10-01 04:22:53.207756', 'step': 8177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:53.237859', 'step': 8177, 'epoch': 2} {'type': 'loss', 'content': 0.15351946651935577, 'timestamp': '2025-10-01 04:22:53.240204', 'step': 8178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:53.269824', 'step': 8178, 'epoch': 2} {'type': 'loss', 'content': 0.16031022369861603, 'timestamp': '2025-10-01 04:22:53.272360', 'step': 8179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:53.302324', 'step': 8179, 'epoch': 2} {'type': 'loss', 'content': 0.06367288529872894, 'timestamp': '2025-10-01 04:22:53.325931', 'step': 8180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:53.356120', 'step': 8180, 'epoch': 2} {'type': 'loss', 'content': 0.06968310475349426, 'timestamp': '2025-10-01 04:22:53.358287', 'step': 8181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:53.389007', 'step': 8181, 'epoch': 2} {'type': 'loss', 'content': 0.15518909692764282, 'timestamp': '2025-10-01 04:22:53.390957', 'step': 8182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:53.421535', 'step': 8182, 'epoch': 2} {'type': 'loss', 'content': 0.13049186766147614, 'timestamp': '2025-10-01 04:22:53.423579', 'step': 8183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:53.453945', 'step': 8183, 'epoch': 2} {'type': 'loss', 'content': 0.03816722705960274, 'timestamp': '2025-10-01 04:22:53.477697', 'step': 8184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:53.508333', 'step': 8184, 'epoch': 2} {'type': 'loss', 'content': 0.1296631097793579, 'timestamp': '2025-10-01 04:22:53.510581', 'step': 8185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:53.541275', 'step': 8185, 'epoch': 2} {'type': 'loss', 'content': 0.12940889596939087, 'timestamp': '2025-10-01 04:22:53.543549', 'step': 8186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:53.579626', 'step': 8186, 'epoch': 2} {'type': 'loss', 'content': 0.1360541135072708, 'timestamp': '2025-10-01 04:22:53.582470', 'step': 8187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:53.613367', 'step': 8187, 'epoch': 2} {'type': 'loss', 'content': 0.059581514447927475, 'timestamp': '2025-10-01 04:22:53.637820', 'step': 8188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:53.668612', 'step': 8188, 'epoch': 2} {'type': 'loss', 'content': 0.1250019669532776, 'timestamp': '2025-10-01 04:22:53.678743', 'step': 8189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:53.708971', 'step': 8189, 'epoch': 2} {'type': 'loss', 'content': 0.17052584886550903, 'timestamp': '2025-10-01 04:22:53.711465', 'step': 8190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:53.742955', 'step': 8190, 'epoch': 2} {'type': 'loss', 'content': 0.1265021562576294, 'timestamp': '2025-10-01 04:22:53.745397', 'step': 8191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:53.776210', 'step': 8191, 'epoch': 2} {'type': 'loss', 'content': 0.10317804664373398, 'timestamp': '2025-10-01 04:22:53.800239', 'step': 8192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:53.830457', 'step': 8192, 'epoch': 2} {'type': 'loss', 'content': 0.15772634744644165, 'timestamp': '2025-10-01 04:22:53.832757', 'step': 8193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:53.863582', 'step': 8193, 'epoch': 2} {'type': 'loss', 'content': 0.1673537641763687, 'timestamp': '2025-10-01 04:22:53.865915', 'step': 8194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:53.896546', 'step': 8194, 'epoch': 2} {'type': 'loss', 'content': 0.20702187716960907, 'timestamp': '2025-10-01 04:22:53.899069', 'step': 8195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:53.928845', 'step': 8195, 'epoch': 2} {'type': 'loss', 'content': 0.07077605277299881, 'timestamp': '2025-10-01 04:22:53.953266', 'step': 8196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:53.983983', 'step': 8196, 'epoch': 2} {'type': 'loss', 'content': 0.13477878272533417, 'timestamp': '2025-10-01 04:22:53.987144', 'step': 8197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:54.017803', 'step': 8197, 'epoch': 2} {'type': 'loss', 'content': 0.09533080458641052, 'timestamp': '2025-10-01 04:22:54.020291', 'step': 8198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:54.050875', 'step': 8198, 'epoch': 2} {'type': 'loss', 'content': 0.1208794042468071, 'timestamp': '2025-10-01 04:22:54.054685', 'step': 8199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:54.085512', 'step': 8199, 'epoch': 2} {'type': 'loss', 'content': 0.043915756046772, 'timestamp': '2025-10-01 04:22:54.109369', 'step': 8200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:54.140212', 'step': 8200, 'epoch': 2} {'type': 'loss', 'content': 0.11539649963378906, 'timestamp': '2025-10-01 04:22:54.142885', 'step': 8201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:54.172977', 'step': 8201, 'epoch': 2} {'type': 'loss', 'content': 0.10060971230268478, 'timestamp': '2025-10-01 04:22:54.175541', 'step': 8202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:54.205874', 'step': 8202, 'epoch': 2} {'type': 'loss', 'content': 0.23500385880470276, 'timestamp': '2025-10-01 04:22:54.210693', 'step': 8203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:54.250037', 'step': 8203, 'epoch': 2} {'type': 'loss', 'content': 0.14836211502552032, 'timestamp': '2025-10-01 04:22:54.273997', 'step': 8204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:54.304447', 'step': 8204, 'epoch': 2} {'type': 'loss', 'content': 0.05069867521524429, 'timestamp': '2025-10-01 04:22:54.306736', 'step': 8205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:54.337071', 'step': 8205, 'epoch': 2} {'type': 'loss', 'content': 0.09998000413179398, 'timestamp': '2025-10-01 04:22:54.339880', 'step': 8206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:54.380042', 'step': 8206, 'epoch': 2} {'type': 'loss', 'content': 0.15026871860027313, 'timestamp': '2025-10-01 04:22:54.382519', 'step': 8207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:54.413393', 'step': 8207, 'epoch': 2} {'type': 'loss', 'content': 0.15671643614768982, 'timestamp': '2025-10-01 04:22:54.437234', 'step': 8208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:54.467372', 'step': 8208, 'epoch': 2} {'type': 'loss', 'content': 0.12371954321861267, 'timestamp': '2025-10-01 04:22:54.470636', 'step': 8209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:54.500682', 'step': 8209, 'epoch': 2} {'type': 'loss', 'content': 0.13173902034759521, 'timestamp': '2025-10-01 04:22:54.503194', 'step': 8210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:54.533977', 'step': 8210, 'epoch': 2} {'type': 'loss', 'content': 0.15452486276626587, 'timestamp': '2025-10-01 04:22:54.536359', 'step': 8211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:54.566386', 'step': 8211, 'epoch': 2} {'type': 'loss', 'content': 0.16582919657230377, 'timestamp': '2025-10-01 04:22:54.590336', 'step': 8212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:54.623038', 'step': 8212, 'epoch': 2} {'type': 'loss', 'content': 0.14490985870361328, 'timestamp': '2025-10-01 04:22:54.626069', 'step': 8213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:54.656295', 'step': 8213, 'epoch': 2} {'type': 'loss', 'content': 0.1860407292842865, 'timestamp': '2025-10-01 04:22:54.658594', 'step': 8214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:54.690031', 'step': 8214, 'epoch': 2} {'type': 'loss', 'content': 0.13126495480537415, 'timestamp': '2025-10-01 04:22:54.692251', 'step': 8215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:54.724058', 'step': 8215, 'epoch': 2} {'type': 'loss', 'content': 0.10489366203546524, 'timestamp': '2025-10-01 04:22:54.748020', 'step': 8216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:54.778496', 'step': 8216, 'epoch': 2} {'type': 'loss', 'content': 0.13471123576164246, 'timestamp': '2025-10-01 04:22:54.781259', 'step': 8217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:54.813324', 'step': 8217, 'epoch': 2} {'type': 'loss', 'content': 0.2690100073814392, 'timestamp': '2025-10-01 04:22:54.815913', 'step': 8218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:54.846506', 'step': 8218, 'epoch': 2} {'type': 'loss', 'content': 0.0500451922416687, 'timestamp': '2025-10-01 04:22:54.849937', 'step': 8219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:54.880484', 'step': 8219, 'epoch': 2} {'type': 'loss', 'content': 0.06335490196943283, 'timestamp': '2025-10-01 04:22:54.904469', 'step': 8220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:54.934893', 'step': 8220, 'epoch': 2} {'type': 'loss', 'content': 0.08293285965919495, 'timestamp': '2025-10-01 04:22:54.937257', 'step': 8221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:54.979700', 'step': 8221, 'epoch': 2} {'type': 'loss', 'content': 0.09673890471458435, 'timestamp': '2025-10-01 04:22:54.982018', 'step': 8222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:55.012523', 'step': 8222, 'epoch': 2} {'type': 'loss', 'content': 0.17153891921043396, 'timestamp': '2025-10-01 04:22:55.014536', 'step': 8223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:55.046929', 'step': 8223, 'epoch': 2} {'type': 'loss', 'content': 0.05854056775569916, 'timestamp': '2025-10-01 04:22:55.070644', 'step': 8224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:55.115387', 'step': 8224, 'epoch': 2} {'type': 'loss', 'content': 0.19125734269618988, 'timestamp': '2025-10-01 04:22:55.117529', 'step': 8225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:55.153296', 'step': 8225, 'epoch': 2} {'type': 'loss', 'content': 0.15331684052944183, 'timestamp': '2025-10-01 04:22:55.155418', 'step': 8226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:55.193778', 'step': 8226, 'epoch': 2} {'type': 'loss', 'content': 0.06817185133695602, 'timestamp': '2025-10-01 04:22:55.195847', 'step': 8227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:55.233680', 'step': 8227, 'epoch': 2} {'type': 'loss', 'content': 0.15217985212802887, 'timestamp': '2025-10-01 04:22:55.257162', 'step': 8228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:55.297528', 'step': 8228, 'epoch': 2} {'type': 'loss', 'content': 0.1292182207107544, 'timestamp': '2025-10-01 04:22:55.300085', 'step': 8229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:55.337484', 'step': 8229, 'epoch': 2} {'type': 'loss', 'content': 0.09909120947122574, 'timestamp': '2025-10-01 04:22:55.339645', 'step': 8230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:55.371170', 'step': 8230, 'epoch': 2} {'type': 'loss', 'content': 0.10817278176546097, 'timestamp': '2025-10-01 04:22:55.374042', 'step': 8231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:55.406456', 'step': 8231, 'epoch': 2} {'type': 'loss', 'content': 0.04983855038881302, 'timestamp': '2025-10-01 04:22:55.429891', 'step': 8232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:55.461184', 'step': 8232, 'epoch': 2} {'type': 'loss', 'content': 0.11342354863882065, 'timestamp': '2025-10-01 04:22:55.463544', 'step': 8233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:55.508526', 'step': 8233, 'epoch': 2} {'type': 'loss', 'content': 0.12888464331626892, 'timestamp': '2025-10-01 04:22:55.510430', 'step': 8234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:55.541797', 'step': 8234, 'epoch': 2} {'type': 'loss', 'content': 0.06885817646980286, 'timestamp': '2025-10-01 04:22:55.544075', 'step': 8235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:55.574798', 'step': 8235, 'epoch': 2} {'type': 'loss', 'content': 0.11177368462085724, 'timestamp': '2025-10-01 04:22:55.598260', 'step': 8236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:55.636857', 'step': 8236, 'epoch': 2} {'type': 'loss', 'content': 0.17971643805503845, 'timestamp': '2025-10-01 04:22:55.639023', 'step': 8237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:55.682994', 'step': 8237, 'epoch': 2} {'type': 'loss', 'content': 0.041805338114500046, 'timestamp': '2025-10-01 04:22:55.684991', 'step': 8238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:55.730121', 'step': 8238, 'epoch': 2} {'type': 'loss', 'content': 0.13847500085830688, 'timestamp': '2025-10-01 04:22:55.732216', 'step': 8239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:55.766973', 'step': 8239, 'epoch': 2} {'type': 'loss', 'content': 0.15792277455329895, 'timestamp': '2025-10-01 04:22:55.791821', 'step': 8240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:55.858940', 'step': 8240, 'epoch': 2} {'type': 'loss', 'content': 0.05988914147019386, 'timestamp': '2025-10-01 04:22:55.860889', 'step': 8241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:55.932538', 'step': 8241, 'epoch': 2} {'type': 'loss', 'content': 0.11336121708154678, 'timestamp': '2025-10-01 04:22:55.934911', 'step': 8242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:55.968221', 'step': 8242, 'epoch': 2} {'type': 'loss', 'content': 0.10238942503929138, 'timestamp': '2025-10-01 04:22:55.974023', 'step': 8243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:56.014913', 'step': 8243, 'epoch': 2} {'type': 'loss', 'content': 0.12415177375078201, 'timestamp': '2025-10-01 04:22:56.038689', 'step': 8244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:56.076820', 'step': 8244, 'epoch': 2} {'type': 'loss', 'content': 0.09616179019212723, 'timestamp': '2025-10-01 04:22:56.078545', 'step': 8245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:56.123314', 'step': 8245, 'epoch': 2} {'type': 'loss', 'content': 0.20003953576087952, 'timestamp': '2025-10-01 04:22:56.125184', 'step': 8246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:56.157538', 'step': 8246, 'epoch': 2} {'type': 'loss', 'content': 0.14938698709011078, 'timestamp': '2025-10-01 04:22:56.159955', 'step': 8247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:56.197112', 'step': 8247, 'epoch': 2} {'type': 'loss', 'content': 0.08195056766271591, 'timestamp': '2025-10-01 04:22:56.220369', 'step': 8248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:56.252579', 'step': 8248, 'epoch': 2} {'type': 'loss', 'content': 0.0636812299489975, 'timestamp': '2025-10-01 04:22:56.255183', 'step': 8249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:56.285911', 'step': 8249, 'epoch': 2} {'type': 'loss', 'content': 0.1879834085702896, 'timestamp': '2025-10-01 04:22:56.288212', 'step': 8250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:56.318870', 'step': 8250, 'epoch': 2} {'type': 'loss', 'content': 0.16599781811237335, 'timestamp': '2025-10-01 04:22:56.321599', 'step': 8251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:56.360913', 'step': 8251, 'epoch': 2} {'type': 'loss', 'content': 0.10590384900569916, 'timestamp': '2025-10-01 04:22:56.387655', 'step': 8252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:56.426505', 'step': 8252, 'epoch': 2} {'type': 'loss', 'content': 0.09861370176076889, 'timestamp': '2025-10-01 04:22:56.428544', 'step': 8253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:56.460131', 'step': 8253, 'epoch': 2} {'type': 'loss', 'content': 0.17748677730560303, 'timestamp': '2025-10-01 04:22:56.463224', 'step': 8254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:56.506361', 'step': 8254, 'epoch': 2} {'type': 'loss', 'content': 0.07002489268779755, 'timestamp': '2025-10-01 04:22:56.508305', 'step': 8255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:56.547142', 'step': 8255, 'epoch': 2} {'type': 'loss', 'content': 0.10383627563714981, 'timestamp': '2025-10-01 04:22:56.572362', 'step': 8256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:56.603825', 'step': 8256, 'epoch': 2} {'type': 'loss', 'content': 0.08440680056810379, 'timestamp': '2025-10-01 04:22:56.605719', 'step': 8257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:56.638894', 'step': 8257, 'epoch': 2} {'type': 'loss', 'content': 0.06730026006698608, 'timestamp': '2025-10-01 04:22:56.643075', 'step': 8258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:56.684193', 'step': 8258, 'epoch': 2} {'type': 'loss', 'content': 0.1220778375864029, 'timestamp': '2025-10-01 04:22:56.686127', 'step': 8259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:56.729982', 'step': 8259, 'epoch': 2} {'type': 'loss', 'content': 0.17819994688034058, 'timestamp': '2025-10-01 04:22:56.753476', 'step': 8260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:56.793916', 'step': 8260, 'epoch': 2} {'type': 'loss', 'content': 0.1486859917640686, 'timestamp': '2025-10-01 04:22:56.796129', 'step': 8261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:56.838547', 'step': 8261, 'epoch': 2} {'type': 'loss', 'content': 0.11345463246107101, 'timestamp': '2025-10-01 04:22:56.841922', 'step': 8262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:56.890123', 'step': 8262, 'epoch': 2} {'type': 'loss', 'content': 0.19791348278522491, 'timestamp': '2025-10-01 04:22:56.892267', 'step': 8263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:56.942885', 'step': 8263, 'epoch': 2} {'type': 'loss', 'content': 0.08736348897218704, 'timestamp': '2025-10-01 04:22:56.966356', 'step': 8264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.010237', 'step': 8264, 'epoch': 2} {'type': 'loss', 'content': 0.036233123391866684, 'timestamp': '2025-10-01 04:22:57.012401', 'step': 8265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.045675', 'step': 8265, 'epoch': 2} {'type': 'loss', 'content': 0.1367216408252716, 'timestamp': '2025-10-01 04:22:57.049462', 'step': 8266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:57.082445', 'step': 8266, 'epoch': 2} {'type': 'loss', 'content': 0.1299361139535904, 'timestamp': '2025-10-01 04:22:57.084939', 'step': 8267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:57.124660', 'step': 8267, 'epoch': 2} {'type': 'loss', 'content': 0.15583080053329468, 'timestamp': '2025-10-01 04:22:57.148558', 'step': 8268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.180360', 'step': 8268, 'epoch': 2} {'type': 'loss', 'content': 0.15992051362991333, 'timestamp': '2025-10-01 04:22:57.182612', 'step': 8269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.217989', 'step': 8269, 'epoch': 2} {'type': 'loss', 'content': 0.12998788058757782, 'timestamp': '2025-10-01 04:22:57.219960', 'step': 8270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.254041', 'step': 8270, 'epoch': 2} {'type': 'loss', 'content': 0.0846841111779213, 'timestamp': '2025-10-01 04:22:57.260778', 'step': 8271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:57.296452', 'step': 8271, 'epoch': 2} {'type': 'loss', 'content': 0.2012147754430771, 'timestamp': '2025-10-01 04:22:57.319825', 'step': 8272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:57.355390', 'step': 8272, 'epoch': 2} {'type': 'loss', 'content': 0.06352435052394867, 'timestamp': '2025-10-01 04:22:57.357458', 'step': 8273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.388541', 'step': 8273, 'epoch': 2} {'type': 'loss', 'content': 0.11762823909521103, 'timestamp': '2025-10-01 04:22:57.390582', 'step': 8274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:22:57.428170', 'step': 8274, 'epoch': 2} {'type': 'loss', 'content': 0.09369291365146637, 'timestamp': '2025-10-01 04:22:57.432498', 'step': 8275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.463033', 'step': 8275, 'epoch': 2} {'type': 'loss', 'content': 0.07989942282438278, 'timestamp': '2025-10-01 04:22:57.486375', 'step': 8276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:57.525164', 'step': 8276, 'epoch': 2} {'type': 'loss', 'content': 0.16932855546474457, 'timestamp': '2025-10-01 04:22:57.527215', 'step': 8277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:57.566951', 'step': 8277, 'epoch': 2} {'type': 'loss', 'content': 0.12584267556667328, 'timestamp': '2025-10-01 04:22:57.568806', 'step': 8278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:57.608154', 'step': 8278, 'epoch': 2} {'type': 'loss', 'content': 0.16318108141422272, 'timestamp': '2025-10-01 04:22:57.611198', 'step': 8279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.644600', 'step': 8279, 'epoch': 2} {'type': 'loss', 'content': 0.11215130239725113, 'timestamp': '2025-10-01 04:22:57.668196', 'step': 8280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:57.699153', 'step': 8280, 'epoch': 2} {'type': 'loss', 'content': 0.1371694952249527, 'timestamp': '2025-10-01 04:22:57.701335', 'step': 8281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.738624', 'step': 8281, 'epoch': 2} {'type': 'loss', 'content': 0.09247235953807831, 'timestamp': '2025-10-01 04:22:57.741529', 'step': 8282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:57.780887', 'step': 8282, 'epoch': 2} {'type': 'loss', 'content': 0.10142849385738373, 'timestamp': '2025-10-01 04:22:57.783030', 'step': 8283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:57.815629', 'step': 8283, 'epoch': 2} {'type': 'loss', 'content': 0.08615856617689133, 'timestamp': '2025-10-01 04:22:57.839495', 'step': 8284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:57.875107', 'step': 8284, 'epoch': 2} {'type': 'loss', 'content': 0.21430975198745728, 'timestamp': '2025-10-01 04:22:57.877098', 'step': 8285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.909408', 'step': 8285, 'epoch': 2} {'type': 'loss', 'content': 0.08748549222946167, 'timestamp': '2025-10-01 04:22:57.911924', 'step': 8286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:57.942644', 'step': 8286, 'epoch': 2} {'type': 'loss', 'content': 0.2405921071767807, 'timestamp': '2025-10-01 04:22:57.951065', 'step': 8287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:57.985593', 'step': 8287, 'epoch': 2} {'type': 'loss', 'content': 0.16880548000335693, 'timestamp': '2025-10-01 04:22:58.010064', 'step': 8288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:58.039976', 'step': 8288, 'epoch': 2} {'type': 'loss', 'content': 0.1102624461054802, 'timestamp': '2025-10-01 04:22:58.041865', 'step': 8289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:58.072712', 'step': 8289, 'epoch': 2} {'type': 'loss', 'content': 0.1187213659286499, 'timestamp': '2025-10-01 04:22:58.074823', 'step': 8290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.105014', 'step': 8290, 'epoch': 2} {'type': 'loss', 'content': 0.15514761209487915, 'timestamp': '2025-10-01 04:22:58.107053', 'step': 8291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.140843', 'step': 8291, 'epoch': 2} {'type': 'loss', 'content': 0.11657886207103729, 'timestamp': '2025-10-01 04:22:58.164261', 'step': 8292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.197630', 'step': 8292, 'epoch': 2} {'type': 'loss', 'content': 0.06029637157917023, 'timestamp': '2025-10-01 04:22:58.199778', 'step': 8293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:58.232801', 'step': 8293, 'epoch': 2} {'type': 'loss', 'content': 0.12044262886047363, 'timestamp': '2025-10-01 04:22:58.234872', 'step': 8294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:58.267051', 'step': 8294, 'epoch': 2} {'type': 'loss', 'content': 0.07202025502920151, 'timestamp': '2025-10-01 04:22:58.269517', 'step': 8295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:58.301371', 'step': 8295, 'epoch': 2} {'type': 'loss', 'content': 0.09289614856243134, 'timestamp': '2025-10-01 04:22:58.324960', 'step': 8296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.356038', 'step': 8296, 'epoch': 2} {'type': 'loss', 'content': 0.15782897174358368, 'timestamp': '2025-10-01 04:22:58.366036', 'step': 8297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:58.397855', 'step': 8297, 'epoch': 2} {'type': 'loss', 'content': 0.1275893896818161, 'timestamp': '2025-10-01 04:22:58.400188', 'step': 8298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:58.432864', 'step': 8298, 'epoch': 2} {'type': 'loss', 'content': 0.25016868114471436, 'timestamp': '2025-10-01 04:22:58.434782', 'step': 8299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.465291', 'step': 8299, 'epoch': 2} {'type': 'loss', 'content': 0.16100116074085236, 'timestamp': '2025-10-01 04:22:58.488769', 'step': 8300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:58.519629', 'step': 8300, 'epoch': 2} {'type': 'loss', 'content': 0.173725888133049, 'timestamp': '2025-10-01 04:22:58.521587', 'step': 8301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.554761', 'step': 8301, 'epoch': 2} {'type': 'loss', 'content': 0.061653684824705124, 'timestamp': '2025-10-01 04:22:58.556642', 'step': 8302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.587720', 'step': 8302, 'epoch': 2} {'type': 'loss', 'content': 0.16033931076526642, 'timestamp': '2025-10-01 04:22:58.589785', 'step': 8303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.619686', 'step': 8303, 'epoch': 2} {'type': 'loss', 'content': 0.13706016540527344, 'timestamp': '2025-10-01 04:22:58.642985', 'step': 8304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:58.674076', 'step': 8304, 'epoch': 2} {'type': 'loss', 'content': 0.12459015101194382, 'timestamp': '2025-10-01 04:22:58.676460', 'step': 8305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:58.708810', 'step': 8305, 'epoch': 2} {'type': 'loss', 'content': 0.13863122463226318, 'timestamp': '2025-10-01 04:22:58.710751', 'step': 8306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.746211', 'step': 8306, 'epoch': 2} {'type': 'loss', 'content': 0.08764716237783432, 'timestamp': '2025-10-01 04:22:58.748181', 'step': 8307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:22:58.779225', 'step': 8307, 'epoch': 2} {'type': 'loss', 'content': 0.13779981434345245, 'timestamp': '2025-10-01 04:22:58.803056', 'step': 8308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.835036', 'step': 8308, 'epoch': 2} {'type': 'loss', 'content': 0.10528746992349625, 'timestamp': '2025-10-01 04:22:58.836990', 'step': 8309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:58.867792', 'step': 8309, 'epoch': 2} {'type': 'loss', 'content': 0.09673253446817398, 'timestamp': '2025-10-01 04:22:58.869786', 'step': 8310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:58.910232', 'step': 8310, 'epoch': 2} {'type': 'loss', 'content': 0.15200288593769073, 'timestamp': '2025-10-01 04:22:58.912234', 'step': 8311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.942690', 'step': 8311, 'epoch': 2} {'type': 'loss', 'content': 0.11327975988388062, 'timestamp': '2025-10-01 04:22:58.966349', 'step': 8312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:58.997557', 'step': 8312, 'epoch': 2} {'type': 'loss', 'content': 0.029338626191020012, 'timestamp': '2025-10-01 04:22:58.999608', 'step': 8313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:59.029876', 'step': 8313, 'epoch': 2} {'type': 'loss', 'content': 0.13822504878044128, 'timestamp': '2025-10-01 04:22:59.032210', 'step': 8314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:59.062376', 'step': 8314, 'epoch': 2} {'type': 'loss', 'content': 0.14109481871128082, 'timestamp': '2025-10-01 04:22:59.064461', 'step': 8315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:59.095557', 'step': 8315, 'epoch': 2} {'type': 'loss', 'content': 0.13302969932556152, 'timestamp': '2025-10-01 04:22:59.122622', 'step': 8316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:59.153531', 'step': 8316, 'epoch': 2} {'type': 'loss', 'content': 0.11413350701332092, 'timestamp': '2025-10-01 04:22:59.155552', 'step': 8317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:59.191612', 'step': 8317, 'epoch': 2} {'type': 'loss', 'content': 0.17501501739025116, 'timestamp': '2025-10-01 04:22:59.193767', 'step': 8318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:59.224286', 'step': 8318, 'epoch': 2} {'type': 'loss', 'content': 0.06505771726369858, 'timestamp': '2025-10-01 04:22:59.226864', 'step': 8319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:59.256317', 'step': 8319, 'epoch': 2} {'type': 'loss', 'content': 0.11682116985321045, 'timestamp': '2025-10-01 04:22:59.280788', 'step': 8320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:59.310889', 'step': 8320, 'epoch': 2} {'type': 'loss', 'content': 0.07520481199026108, 'timestamp': '2025-10-01 04:22:59.312774', 'step': 8321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:59.342251', 'step': 8321, 'epoch': 2} {'type': 'loss', 'content': 0.06388846039772034, 'timestamp': '2025-10-01 04:22:59.344360', 'step': 8322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:59.375018', 'step': 8322, 'epoch': 2} {'type': 'loss', 'content': 0.23120170831680298, 'timestamp': '2025-10-01 04:22:59.377483', 'step': 8323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:22:59.408546', 'step': 8323, 'epoch': 2} {'type': 'loss', 'content': 0.0871802493929863, 'timestamp': '2025-10-01 04:22:59.433420', 'step': 8324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:59.462706', 'step': 8324, 'epoch': 2} {'type': 'loss', 'content': 0.1618255078792572, 'timestamp': '2025-10-01 04:22:59.464565', 'step': 8325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:59.494088', 'step': 8325, 'epoch': 2} {'type': 'loss', 'content': 0.17737464606761932, 'timestamp': '2025-10-01 04:22:59.496167', 'step': 8326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:59.527257', 'step': 8326, 'epoch': 2} {'type': 'loss', 'content': 0.1256895661354065, 'timestamp': '2025-10-01 04:22:59.529598', 'step': 8327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:59.564044', 'step': 8327, 'epoch': 2} {'type': 'loss', 'content': 0.1973111778497696, 'timestamp': '2025-10-01 04:22:59.591951', 'step': 8328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:59.629164', 'step': 8328, 'epoch': 2} {'type': 'loss', 'content': 0.1404000222682953, 'timestamp': '2025-10-01 04:22:59.637847', 'step': 8329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:59.669986', 'step': 8329, 'epoch': 2} {'type': 'loss', 'content': 0.15203168988227844, 'timestamp': '2025-10-01 04:22:59.672022', 'step': 8330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:59.701983', 'step': 8330, 'epoch': 2} {'type': 'loss', 'content': 0.05926462262868881, 'timestamp': '2025-10-01 04:22:59.704072', 'step': 8331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:59.736899', 'step': 8331, 'epoch': 2} {'type': 'loss', 'content': 0.130477637052536, 'timestamp': '2025-10-01 04:22:59.760290', 'step': 8332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:22:59.791499', 'step': 8332, 'epoch': 2} {'type': 'loss', 'content': 0.08716201037168503, 'timestamp': '2025-10-01 04:22:59.793442', 'step': 8333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:59.823379', 'step': 8333, 'epoch': 2} {'type': 'loss', 'content': 0.20210544764995575, 'timestamp': '2025-10-01 04:22:59.825347', 'step': 8334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:59.855086', 'step': 8334, 'epoch': 2} {'type': 'loss', 'content': 0.09811016917228699, 'timestamp': '2025-10-01 04:22:59.856894', 'step': 8335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:22:59.894101', 'step': 8335, 'epoch': 2} {'type': 'loss', 'content': 0.09228481352329254, 'timestamp': '2025-10-01 04:22:59.917682', 'step': 8336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:22:59.953908', 'step': 8336, 'epoch': 2} {'type': 'loss', 'content': 0.11485680937767029, 'timestamp': '2025-10-01 04:22:59.955769', 'step': 8337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:22:59.986806', 'step': 8337, 'epoch': 2} {'type': 'loss', 'content': 0.13890810310840607, 'timestamp': '2025-10-01 04:22:59.988871', 'step': 8338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:00.019673', 'step': 8338, 'epoch': 2} {'type': 'loss', 'content': 0.11428594589233398, 'timestamp': '2025-10-01 04:23:00.021478', 'step': 8339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:00.064609', 'step': 8339, 'epoch': 2} {'type': 'loss', 'content': 0.07732100039720535, 'timestamp': '2025-10-01 04:23:00.087997', 'step': 8340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:00.118037', 'step': 8340, 'epoch': 2} {'type': 'loss', 'content': 0.06918429583311081, 'timestamp': '2025-10-01 04:23:00.119972', 'step': 8341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:00.149680', 'step': 8341, 'epoch': 2} {'type': 'loss', 'content': 0.15802964568138123, 'timestamp': '2025-10-01 04:23:00.151763', 'step': 8342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:00.181677', 'step': 8342, 'epoch': 2} {'type': 'loss', 'content': 0.10954111814498901, 'timestamp': '2025-10-01 04:23:00.183483', 'step': 8343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:00.214053', 'step': 8343, 'epoch': 2} {'type': 'loss', 'content': 0.1462879329919815, 'timestamp': '2025-10-01 04:23:00.237507', 'step': 8344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:00.266896', 'step': 8344, 'epoch': 2} {'type': 'loss', 'content': 0.09935696423053741, 'timestamp': '2025-10-01 04:23:00.268710', 'step': 8345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:00.298230', 'step': 8345, 'epoch': 2} {'type': 'loss', 'content': 0.2505927085876465, 'timestamp': '2025-10-01 04:23:00.300134', 'step': 8346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:00.330278', 'step': 8346, 'epoch': 2} {'type': 'loss', 'content': 0.1566290557384491, 'timestamp': '2025-10-01 04:23:00.333062', 'step': 8347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:00.362828', 'step': 8347, 'epoch': 2} {'type': 'loss', 'content': 0.17414724826812744, 'timestamp': '2025-10-01 04:23:00.386301', 'step': 8348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:00.415673', 'step': 8348, 'epoch': 2} {'type': 'loss', 'content': 0.2183787077665329, 'timestamp': '2025-10-01 04:23:00.417992', 'step': 8349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:00.452636', 'step': 8349, 'epoch': 2} {'type': 'loss', 'content': 0.051296211779117584, 'timestamp': '2025-10-01 04:23:00.455041', 'step': 8350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:00.486065', 'step': 8350, 'epoch': 2} {'type': 'loss', 'content': 0.14099562168121338, 'timestamp': '2025-10-01 04:23:00.487946', 'step': 8351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:00.518106', 'step': 8351, 'epoch': 2} {'type': 'loss', 'content': 0.0993710458278656, 'timestamp': '2025-10-01 04:23:00.541538', 'step': 8352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:00.577705', 'step': 8352, 'epoch': 2} {'type': 'loss', 'content': 0.11976741254329681, 'timestamp': '2025-10-01 04:23:00.586130', 'step': 8353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:00.616912', 'step': 8353, 'epoch': 2} {'type': 'loss', 'content': 0.1402173489332199, 'timestamp': '2025-10-01 04:23:00.619048', 'step': 8354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:00.648626', 'step': 8354, 'epoch': 2} {'type': 'loss', 'content': 0.09802332520484924, 'timestamp': '2025-10-01 04:23:00.650742', 'step': 8355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:00.681254', 'step': 8355, 'epoch': 2} {'type': 'loss', 'content': 0.11709311604499817, 'timestamp': '2025-10-01 04:23:00.704992', 'step': 8356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:00.736832', 'step': 8356, 'epoch': 2} {'type': 'loss', 'content': 0.12309753149747849, 'timestamp': '2025-10-01 04:23:00.738910', 'step': 8357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:00.768667', 'step': 8357, 'epoch': 2} {'type': 'loss', 'content': 0.11302945762872696, 'timestamp': '2025-10-01 04:23:00.775083', 'step': 8358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:00.810533', 'step': 8358, 'epoch': 2} {'type': 'loss', 'content': 0.11472915858030319, 'timestamp': '2025-10-01 04:23:00.812604', 'step': 8359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:00.842629', 'step': 8359, 'epoch': 2} {'type': 'loss', 'content': 0.2514374554157257, 'timestamp': '2025-10-01 04:23:00.866097', 'step': 8360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:00.895426', 'step': 8360, 'epoch': 2} {'type': 'loss', 'content': 0.08998175710439682, 'timestamp': '2025-10-01 04:23:00.897708', 'step': 8361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:00.931030', 'step': 8361, 'epoch': 2} {'type': 'loss', 'content': 0.09962949901819229, 'timestamp': '2025-10-01 04:23:00.933280', 'step': 8362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:00.962984', 'step': 8362, 'epoch': 2} {'type': 'loss', 'content': 0.07804112136363983, 'timestamp': '2025-10-01 04:23:00.965287', 'step': 8363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:00.996607', 'step': 8363, 'epoch': 2} {'type': 'loss', 'content': 0.09268379956483841, 'timestamp': '2025-10-01 04:23:01.022302', 'step': 8364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:01.053162', 'step': 8364, 'epoch': 2} {'type': 'loss', 'content': 0.15355299413204193, 'timestamp': '2025-10-01 04:23:01.055226', 'step': 8365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:01.085586', 'step': 8365, 'epoch': 2} {'type': 'loss', 'content': 0.16714154183864594, 'timestamp': '2025-10-01 04:23:01.088039', 'step': 8366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:01.118802', 'step': 8366, 'epoch': 2} {'type': 'loss', 'content': 0.1272413283586502, 'timestamp': '2025-10-01 04:23:01.120766', 'step': 8367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:01.152365', 'step': 8367, 'epoch': 2} {'type': 'loss', 'content': 0.08453300595283508, 'timestamp': '2025-10-01 04:23:01.176134', 'step': 8368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:01.206758', 'step': 8368, 'epoch': 2} {'type': 'loss', 'content': 0.14448118209838867, 'timestamp': '2025-10-01 04:23:01.208949', 'step': 8369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:01.239043', 'step': 8369, 'epoch': 2} {'type': 'loss', 'content': 0.12527310848236084, 'timestamp': '2025-10-01 04:23:01.241634', 'step': 8370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:01.277473', 'step': 8370, 'epoch': 2} {'type': 'loss', 'content': 0.16815891861915588, 'timestamp': '2025-10-01 04:23:01.279885', 'step': 8371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:01.309878', 'step': 8371, 'epoch': 2} {'type': 'loss', 'content': 0.1304021179676056, 'timestamp': '2025-10-01 04:23:01.333224', 'step': 8372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:01.370415', 'step': 8372, 'epoch': 2} {'type': 'loss', 'content': 0.12546662986278534, 'timestamp': '2025-10-01 04:23:01.373083', 'step': 8373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:01.403461', 'step': 8373, 'epoch': 2} {'type': 'loss', 'content': 0.09565796703100204, 'timestamp': '2025-10-01 04:23:01.406671', 'step': 8374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:23:01.441122', 'step': 8374, 'epoch': 2} {'type': 'loss', 'content': 0.2411731779575348, 'timestamp': '2025-10-01 04:23:01.448550', 'step': 8375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:01.481948', 'step': 8375, 'epoch': 2} {'type': 'loss', 'content': 0.12166458368301392, 'timestamp': '2025-10-01 04:23:01.505573', 'step': 8376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:01.538383', 'step': 8376, 'epoch': 2} {'type': 'loss', 'content': 0.20422601699829102, 'timestamp': '2025-10-01 04:23:01.540899', 'step': 8377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:01.570552', 'step': 8377, 'epoch': 2} {'type': 'loss', 'content': 0.1320718675851822, 'timestamp': '2025-10-01 04:23:01.575366', 'step': 8378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:01.606114', 'step': 8378, 'epoch': 2} {'type': 'loss', 'content': 0.18135598301887512, 'timestamp': '2025-10-01 04:23:01.608137', 'step': 8379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:01.639619', 'step': 8379, 'epoch': 2} {'type': 'loss', 'content': 0.11089888960123062, 'timestamp': '2025-10-01 04:23:01.663669', 'step': 8380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:01.693331', 'step': 8380, 'epoch': 2} {'type': 'loss', 'content': 0.21638642251491547, 'timestamp': '2025-10-01 04:23:01.695228', 'step': 8381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:01.725400', 'step': 8381, 'epoch': 2} {'type': 'loss', 'content': 0.13259322941303253, 'timestamp': '2025-10-01 04:23:01.727754', 'step': 8382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:01.757537', 'step': 8382, 'epoch': 2} {'type': 'loss', 'content': 0.12787240743637085, 'timestamp': '2025-10-01 04:23:01.759455', 'step': 8383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:01.789683', 'step': 8383, 'epoch': 2} {'type': 'loss', 'content': 0.1979699432849884, 'timestamp': '2025-10-01 04:23:01.813011', 'step': 8384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:01.843028', 'step': 8384, 'epoch': 2} {'type': 'loss', 'content': 0.10137957334518433, 'timestamp': '2025-10-01 04:23:01.845415', 'step': 8385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:01.876070', 'step': 8385, 'epoch': 2} {'type': 'loss', 'content': 0.22983048856258392, 'timestamp': '2025-10-01 04:23:01.878187', 'step': 8386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:01.910731', 'step': 8386, 'epoch': 2} {'type': 'loss', 'content': 0.24460582435131073, 'timestamp': '2025-10-01 04:23:01.912775', 'step': 8387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:01.944099', 'step': 8387, 'epoch': 2} {'type': 'loss', 'content': 0.10010193288326263, 'timestamp': '2025-10-01 04:23:01.967510', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:23:11.283870', 'step': 8388, 'epoch': 2} {'type': 'pplx', 'content': 12265.823729834887, 'timestamp': '2025-10-01 04:23:11.287762', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:11.317574', 'step': 8388, 'epoch': 2} {'type': 'loss', 'content': 0.11446291208267212, 'timestamp': '2025-10-01 04:23:11.319637', 'step': 8389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:11.352227', 'step': 8389, 'epoch': 2} {'type': 'loss', 'content': 0.09007501602172852, 'timestamp': '2025-10-01 04:23:11.354472', 'step': 8390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:11.390224', 'step': 8390, 'epoch': 2} {'type': 'loss', 'content': 0.06846612691879272, 'timestamp': '2025-10-01 04:23:11.392320', 'step': 8391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:11.439251', 'step': 8391, 'epoch': 2} {'type': 'loss', 'content': 0.18546843528747559, 'timestamp': '2025-10-01 04:23:11.463283', 'step': 8392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:11.496918', 'step': 8392, 'epoch': 2} {'type': 'loss', 'content': 0.08224210888147354, 'timestamp': '2025-10-01 04:23:11.498906', 'step': 8393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:11.531280', 'step': 8393, 'epoch': 2} {'type': 'loss', 'content': 0.09150803089141846, 'timestamp': '2025-10-01 04:23:11.535823', 'step': 8394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:11.569551', 'step': 8394, 'epoch': 2} {'type': 'loss', 'content': 0.12850511074066162, 'timestamp': '2025-10-01 04:23:11.571765', 'step': 8395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:11.614024', 'step': 8395, 'epoch': 2} {'type': 'loss', 'content': 0.09443698078393936, 'timestamp': '2025-10-01 04:23:11.637610', 'step': 8396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:11.671105', 'step': 8396, 'epoch': 2} {'type': 'loss', 'content': 0.1270783394575119, 'timestamp': '2025-10-01 04:23:11.672995', 'step': 8397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:11.707503', 'step': 8397, 'epoch': 2} {'type': 'loss', 'content': 0.15967759490013123, 'timestamp': '2025-10-01 04:23:11.709479', 'step': 8398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:23:11.741517', 'step': 8398, 'epoch': 2} {'type': 'loss', 'content': 0.17642569541931152, 'timestamp': '2025-10-01 04:23:11.745741', 'step': 8399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:11.778562', 'step': 8399, 'epoch': 2} {'type': 'loss', 'content': 0.1625092625617981, 'timestamp': '2025-10-01 04:23:11.801962', 'step': 8400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:11.840911', 'step': 8400, 'epoch': 2} {'type': 'loss', 'content': 0.11349545419216156, 'timestamp': '2025-10-01 04:23:11.842857', 'step': 8401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:11.875427', 'step': 8401, 'epoch': 2} {'type': 'loss', 'content': 0.11230363696813583, 'timestamp': '2025-10-01 04:23:11.877501', 'step': 8402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:11.912401', 'step': 8402, 'epoch': 2} {'type': 'loss', 'content': 0.14111071825027466, 'timestamp': '2025-10-01 04:23:11.914501', 'step': 8403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:11.997816', 'step': 8403, 'epoch': 2} {'type': 'loss', 'content': 0.0721358209848404, 'timestamp': '2025-10-01 04:23:12.021604', 'step': 8404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:12.054694', 'step': 8404, 'epoch': 2} {'type': 'loss', 'content': 0.1108306348323822, 'timestamp': '2025-10-01 04:23:12.056644', 'step': 8405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:12.108778', 'step': 8405, 'epoch': 2} {'type': 'loss', 'content': 0.07499556988477707, 'timestamp': '2025-10-01 04:23:12.110967', 'step': 8406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.147276', 'step': 8406, 'epoch': 2} {'type': 'loss', 'content': 0.15507882833480835, 'timestamp': '2025-10-01 04:23:12.149327', 'step': 8407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:12.195205', 'step': 8407, 'epoch': 2} {'type': 'loss', 'content': 0.1495574712753296, 'timestamp': '2025-10-01 04:23:12.219515', 'step': 8408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:12.256394', 'step': 8408, 'epoch': 2} {'type': 'loss', 'content': 0.13979946076869965, 'timestamp': '2025-10-01 04:23:12.263482', 'step': 8409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.306689', 'step': 8409, 'epoch': 2} {'type': 'loss', 'content': 0.11274120956659317, 'timestamp': '2025-10-01 04:23:12.308697', 'step': 8410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:12.351268', 'step': 8410, 'epoch': 2} {'type': 'loss', 'content': 0.13025614619255066, 'timestamp': '2025-10-01 04:23:12.354369', 'step': 8411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:12.395298', 'step': 8411, 'epoch': 2} {'type': 'loss', 'content': 0.11947333812713623, 'timestamp': '2025-10-01 04:23:12.419169', 'step': 8412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.473688', 'step': 8412, 'epoch': 2} {'type': 'loss', 'content': 0.12209879606962204, 'timestamp': '2025-10-01 04:23:12.475877', 'step': 8413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:12.509693', 'step': 8413, 'epoch': 2} {'type': 'loss', 'content': 0.04499366879463196, 'timestamp': '2025-10-01 04:23:12.511673', 'step': 8414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.543334', 'step': 8414, 'epoch': 2} {'type': 'loss', 'content': 0.13764894008636475, 'timestamp': '2025-10-01 04:23:12.545632', 'step': 8415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.576094', 'step': 8415, 'epoch': 2} {'type': 'loss', 'content': 0.10821569710969925, 'timestamp': '2025-10-01 04:23:12.600400', 'step': 8416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.631542', 'step': 8416, 'epoch': 2} {'type': 'loss', 'content': 0.15317128598690033, 'timestamp': '2025-10-01 04:23:12.633808', 'step': 8417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:12.663872', 'step': 8417, 'epoch': 2} {'type': 'loss', 'content': 0.15161234140396118, 'timestamp': '2025-10-01 04:23:12.666177', 'step': 8418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.698547', 'step': 8418, 'epoch': 2} {'type': 'loss', 'content': 0.18574057519435883, 'timestamp': '2025-10-01 04:23:12.701194', 'step': 8419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.732403', 'step': 8419, 'epoch': 2} {'type': 'loss', 'content': 0.15546002984046936, 'timestamp': '2025-10-01 04:23:12.755960', 'step': 8420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.790530', 'step': 8420, 'epoch': 2} {'type': 'loss', 'content': 0.08959855139255524, 'timestamp': '2025-10-01 04:23:12.792935', 'step': 8421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:12.838092', 'step': 8421, 'epoch': 2} {'type': 'loss', 'content': 0.08695825934410095, 'timestamp': '2025-10-01 04:23:12.845350', 'step': 8422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.883059', 'step': 8422, 'epoch': 2} {'type': 'loss', 'content': 0.16662263870239258, 'timestamp': '2025-10-01 04:23:12.885245', 'step': 8423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:12.924528', 'step': 8423, 'epoch': 2} {'type': 'loss', 'content': 0.09835811704397202, 'timestamp': '2025-10-01 04:23:12.948287', 'step': 8424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:12.981233', 'step': 8424, 'epoch': 2} {'type': 'loss', 'content': 0.14455722272396088, 'timestamp': '2025-10-01 04:23:12.983299', 'step': 8425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:13.028631', 'step': 8425, 'epoch': 2} {'type': 'loss', 'content': 0.07866175472736359, 'timestamp': '2025-10-01 04:23:13.030778', 'step': 8426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:13.063966', 'step': 8426, 'epoch': 2} {'type': 'loss', 'content': 0.09018928557634354, 'timestamp': '2025-10-01 04:23:13.066762', 'step': 8427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:13.096584', 'step': 8427, 'epoch': 2} {'type': 'loss', 'content': 0.12040988355875015, 'timestamp': '2025-10-01 04:23:13.120808', 'step': 8428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:13.156414', 'step': 8428, 'epoch': 2} {'type': 'loss', 'content': 0.08657865226268768, 'timestamp': '2025-10-01 04:23:13.158569', 'step': 8429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:13.188812', 'step': 8429, 'epoch': 2} {'type': 'loss', 'content': 0.14691874384880066, 'timestamp': '2025-10-01 04:23:13.191088', 'step': 8430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:13.221331', 'step': 8430, 'epoch': 2} {'type': 'loss', 'content': 0.11862387508153915, 'timestamp': '2025-10-01 04:23:13.223239', 'step': 8431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:13.253912', 'step': 8431, 'epoch': 2} {'type': 'loss', 'content': 0.20153765380382538, 'timestamp': '2025-10-01 04:23:13.277387', 'step': 8432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:13.307872', 'step': 8432, 'epoch': 2} {'type': 'loss', 'content': 0.0772872194647789, 'timestamp': '2025-10-01 04:23:13.309862', 'step': 8433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:13.342349', 'step': 8433, 'epoch': 2} {'type': 'loss', 'content': 0.07093861699104309, 'timestamp': '2025-10-01 04:23:13.344393', 'step': 8434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:13.382218', 'step': 8434, 'epoch': 2} {'type': 'loss', 'content': 0.1517626792192459, 'timestamp': '2025-10-01 04:23:13.385182', 'step': 8435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:13.418099', 'step': 8435, 'epoch': 2} {'type': 'loss', 'content': 0.15533216297626495, 'timestamp': '2025-10-01 04:23:13.441624', 'step': 8436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:13.471699', 'step': 8436, 'epoch': 2} {'type': 'loss', 'content': 0.1217166930437088, 'timestamp': '2025-10-01 04:23:13.475202', 'step': 8437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:13.513426', 'step': 8437, 'epoch': 2} {'type': 'loss', 'content': 0.15161703526973724, 'timestamp': '2025-10-01 04:23:13.515631', 'step': 8438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:13.561803', 'step': 8438, 'epoch': 2} {'type': 'loss', 'content': 0.1511802226305008, 'timestamp': '2025-10-01 04:23:13.564069', 'step': 8439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:13.599801', 'step': 8439, 'epoch': 2} {'type': 'loss', 'content': 0.12762726843357086, 'timestamp': '2025-10-01 04:23:13.623774', 'step': 8440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:13.662299', 'step': 8440, 'epoch': 2} {'type': 'loss', 'content': 0.09628032147884369, 'timestamp': '2025-10-01 04:23:13.664314', 'step': 8441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:13.700012', 'step': 8441, 'epoch': 2} {'type': 'loss', 'content': 0.1138564869761467, 'timestamp': '2025-10-01 04:23:13.702196', 'step': 8442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:13.734732', 'step': 8442, 'epoch': 2} {'type': 'loss', 'content': 0.062458522617816925, 'timestamp': '2025-10-01 04:23:13.737645', 'step': 8443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:13.773682', 'step': 8443, 'epoch': 2} {'type': 'loss', 'content': 0.13993686437606812, 'timestamp': '2025-10-01 04:23:13.797355', 'step': 8444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:13.829443', 'step': 8444, 'epoch': 2} {'type': 'loss', 'content': 0.14227156341075897, 'timestamp': '2025-10-01 04:23:13.831602', 'step': 8445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:13.863478', 'step': 8445, 'epoch': 2} {'type': 'loss', 'content': 0.06416817009449005, 'timestamp': '2025-10-01 04:23:13.865618', 'step': 8446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:13.900379', 'step': 8446, 'epoch': 2} {'type': 'loss', 'content': 0.0806230902671814, 'timestamp': '2025-10-01 04:23:13.902844', 'step': 8447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:13.933191', 'step': 8447, 'epoch': 2} {'type': 'loss', 'content': 0.10997592657804489, 'timestamp': '2025-10-01 04:23:13.956789', 'step': 8448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:13.996833', 'step': 8448, 'epoch': 2} {'type': 'loss', 'content': 0.1549125760793686, 'timestamp': '2025-10-01 04:23:13.999318', 'step': 8449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:14.033100', 'step': 8449, 'epoch': 2} {'type': 'loss', 'content': 0.10686031728982925, 'timestamp': '2025-10-01 04:23:14.035751', 'step': 8450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:14.068883', 'step': 8450, 'epoch': 2} {'type': 'loss', 'content': 0.057981107383966446, 'timestamp': '2025-10-01 04:23:14.071040', 'step': 8451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:14.103372', 'step': 8451, 'epoch': 2} {'type': 'loss', 'content': 0.15837234258651733, 'timestamp': '2025-10-01 04:23:14.127240', 'step': 8452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:14.165071', 'step': 8452, 'epoch': 2} {'type': 'loss', 'content': 0.10271843522787094, 'timestamp': '2025-10-01 04:23:14.167516', 'step': 8453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:14.214754', 'step': 8453, 'epoch': 2} {'type': 'loss', 'content': 0.16356459259986877, 'timestamp': '2025-10-01 04:23:14.217282', 'step': 8454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:14.248505', 'step': 8454, 'epoch': 2} {'type': 'loss', 'content': 0.11351238936185837, 'timestamp': '2025-10-01 04:23:14.250523', 'step': 8455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:14.293872', 'step': 8455, 'epoch': 2} {'type': 'loss', 'content': 0.13461844623088837, 'timestamp': '2025-10-01 04:23:14.317471', 'step': 8456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:14.356023', 'step': 8456, 'epoch': 2} {'type': 'loss', 'content': 0.12354955822229385, 'timestamp': '2025-10-01 04:23:14.358220', 'step': 8457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:14.391519', 'step': 8457, 'epoch': 2} {'type': 'loss', 'content': 0.14474622905254364, 'timestamp': '2025-10-01 04:23:14.393671', 'step': 8458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:14.429558', 'step': 8458, 'epoch': 2} {'type': 'loss', 'content': 0.16183555126190186, 'timestamp': '2025-10-01 04:23:14.431645', 'step': 8459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:14.463604', 'step': 8459, 'epoch': 2} {'type': 'loss', 'content': 0.09361271560192108, 'timestamp': '2025-10-01 04:23:14.487187', 'step': 8460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:14.519397', 'step': 8460, 'epoch': 2} {'type': 'loss', 'content': 0.10297556966543198, 'timestamp': '2025-10-01 04:23:14.521400', 'step': 8461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:14.552601', 'step': 8461, 'epoch': 2} {'type': 'loss', 'content': 0.15632687509059906, 'timestamp': '2025-10-01 04:23:14.557976', 'step': 8462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:14.595322', 'step': 8462, 'epoch': 2} {'type': 'loss', 'content': 0.20178745687007904, 'timestamp': '2025-10-01 04:23:14.598992', 'step': 8463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:14.631292', 'step': 8463, 'epoch': 2} {'type': 'loss', 'content': 0.06769059598445892, 'timestamp': '2025-10-01 04:23:14.656483', 'step': 8464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:14.694242', 'step': 8464, 'epoch': 2} {'type': 'loss', 'content': 0.08123022317886353, 'timestamp': '2025-10-01 04:23:14.696216', 'step': 8465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:14.729249', 'step': 8465, 'epoch': 2} {'type': 'loss', 'content': 0.09885057061910629, 'timestamp': '2025-10-01 04:23:14.731501', 'step': 8466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:14.776442', 'step': 8466, 'epoch': 2} {'type': 'loss', 'content': 0.14430828392505646, 'timestamp': '2025-10-01 04:23:14.778738', 'step': 8467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:14.816836', 'step': 8467, 'epoch': 2} {'type': 'loss', 'content': 0.08580692112445831, 'timestamp': '2025-10-01 04:23:14.840363', 'step': 8468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:14.874101', 'step': 8468, 'epoch': 2} {'type': 'loss', 'content': 0.1373308002948761, 'timestamp': '2025-10-01 04:23:14.876275', 'step': 8469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:14.916032', 'step': 8469, 'epoch': 2} {'type': 'loss', 'content': 0.06485152989625931, 'timestamp': '2025-10-01 04:23:14.918309', 'step': 8470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:14.964874', 'step': 8470, 'epoch': 2} {'type': 'loss', 'content': 0.19808711111545563, 'timestamp': '2025-10-01 04:23:14.967333', 'step': 8471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:15.001836', 'step': 8471, 'epoch': 2} {'type': 'loss', 'content': 0.13816572725772858, 'timestamp': '2025-10-01 04:23:15.025540', 'step': 8472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:15.068146', 'step': 8472, 'epoch': 2} {'type': 'loss', 'content': 0.1641685962677002, 'timestamp': '2025-10-01 04:23:15.070118', 'step': 8473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:15.104371', 'step': 8473, 'epoch': 2} {'type': 'loss', 'content': 0.08510977029800415, 'timestamp': '2025-10-01 04:23:15.106587', 'step': 8474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:15.155863', 'step': 8474, 'epoch': 2} {'type': 'loss', 'content': 0.06463059782981873, 'timestamp': '2025-10-01 04:23:15.158446', 'step': 8475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:15.194948', 'step': 8475, 'epoch': 2} {'type': 'loss', 'content': 0.1570568084716797, 'timestamp': '2025-10-01 04:23:15.218604', 'step': 8476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:15.251311', 'step': 8476, 'epoch': 2} {'type': 'loss', 'content': 0.09184189885854721, 'timestamp': '2025-10-01 04:23:15.254759', 'step': 8477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:15.287479', 'step': 8477, 'epoch': 2} {'type': 'loss', 'content': 0.09611387550830841, 'timestamp': '2025-10-01 04:23:15.290234', 'step': 8478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:15.323464', 'step': 8478, 'epoch': 2} {'type': 'loss', 'content': 0.06475993990898132, 'timestamp': '2025-10-01 04:23:15.325452', 'step': 8479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:15.364138', 'step': 8479, 'epoch': 2} {'type': 'loss', 'content': 0.1342337280511856, 'timestamp': '2025-10-01 04:23:15.387858', 'step': 8480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:15.421147', 'step': 8480, 'epoch': 2} {'type': 'loss', 'content': 0.09540805965662003, 'timestamp': '2025-10-01 04:23:15.423395', 'step': 8481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:15.467318', 'step': 8481, 'epoch': 2} {'type': 'loss', 'content': 0.22273559868335724, 'timestamp': '2025-10-01 04:23:15.469483', 'step': 8482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:15.504076', 'step': 8482, 'epoch': 2} {'type': 'loss', 'content': 0.11806211620569229, 'timestamp': '2025-10-01 04:23:15.506212', 'step': 8483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:15.557143', 'step': 8483, 'epoch': 2} {'type': 'loss', 'content': 0.08855874091386795, 'timestamp': '2025-10-01 04:23:15.580669', 'step': 8484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:15.621204', 'step': 8484, 'epoch': 2} {'type': 'loss', 'content': 0.0911061242222786, 'timestamp': '2025-10-01 04:23:15.623552', 'step': 8485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:15.668761', 'step': 8485, 'epoch': 2} {'type': 'loss', 'content': 0.14215190708637238, 'timestamp': '2025-10-01 04:23:15.671181', 'step': 8486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:15.717172', 'step': 8486, 'epoch': 2} {'type': 'loss', 'content': 0.09213284403085709, 'timestamp': '2025-10-01 04:23:15.719278', 'step': 8487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:15.757217', 'step': 8487, 'epoch': 2} {'type': 'loss', 'content': 0.18951316177845, 'timestamp': '2025-10-01 04:23:15.780794', 'step': 8488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:15.825359', 'step': 8488, 'epoch': 2} {'type': 'loss', 'content': 0.191269651055336, 'timestamp': '2025-10-01 04:23:15.827795', 'step': 8489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:15.868026', 'step': 8489, 'epoch': 2} {'type': 'loss', 'content': 0.06673040986061096, 'timestamp': '2025-10-01 04:23:15.870950', 'step': 8490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:15.902460', 'step': 8490, 'epoch': 2} {'type': 'loss', 'content': 0.22659097611904144, 'timestamp': '2025-10-01 04:23:15.904578', 'step': 8491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:15.943558', 'step': 8491, 'epoch': 2} {'type': 'loss', 'content': 0.12101329863071442, 'timestamp': '2025-10-01 04:23:15.967852', 'step': 8492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:16.004552', 'step': 8492, 'epoch': 2} {'type': 'loss', 'content': 0.2216397076845169, 'timestamp': '2025-10-01 04:23:16.006675', 'step': 8493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:16.052312', 'step': 8493, 'epoch': 2} {'type': 'loss', 'content': 0.09979096800088882, 'timestamp': '2025-10-01 04:23:16.054355', 'step': 8494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:16.098427', 'step': 8494, 'epoch': 2} {'type': 'loss', 'content': 0.19203944504261017, 'timestamp': '2025-10-01 04:23:16.100761', 'step': 8495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:16.139576', 'step': 8495, 'epoch': 2} {'type': 'loss', 'content': 0.12758050858974457, 'timestamp': '2025-10-01 04:23:16.163685', 'step': 8496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:16.202691', 'step': 8496, 'epoch': 2} {'type': 'loss', 'content': 0.11984006315469742, 'timestamp': '2025-10-01 04:23:16.204857', 'step': 8497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:16.246136', 'step': 8497, 'epoch': 2} {'type': 'loss', 'content': 0.08452551066875458, 'timestamp': '2025-10-01 04:23:16.249355', 'step': 8498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:16.287186', 'step': 8498, 'epoch': 2} {'type': 'loss', 'content': 0.10328783094882965, 'timestamp': '2025-10-01 04:23:16.290500', 'step': 8499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:23:16.324087', 'step': 8499, 'epoch': 2} {'type': 'loss', 'content': 0.13070382177829742, 'timestamp': '2025-10-01 04:23:16.349467', 'step': 8500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8500', 'timestamp': '2025-10-01 04:23:22.102141', 'step': 8500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:22.145036', 'step': 8500, 'epoch': 2} {'type': 'loss', 'content': 0.20688016712665558, 'timestamp': '2025-10-01 04:23:22.147212', 'step': 8501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:22.196337', 'step': 8501, 'epoch': 2} {'type': 'loss', 'content': 0.1885899305343628, 'timestamp': '2025-10-01 04:23:22.198392', 'step': 8502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:22.231218', 'step': 8502, 'epoch': 2} {'type': 'loss', 'content': 0.1089758351445198, 'timestamp': '2025-10-01 04:23:22.236851', 'step': 8503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:22.275166', 'step': 8503, 'epoch': 2} {'type': 'loss', 'content': 0.18690486252307892, 'timestamp': '2025-10-01 04:23:22.299015', 'step': 8504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:22.332116', 'step': 8504, 'epoch': 2} {'type': 'loss', 'content': 0.173230841755867, 'timestamp': '2025-10-01 04:23:22.334120', 'step': 8505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:22.366365', 'step': 8505, 'epoch': 2} {'type': 'loss', 'content': 0.042219292372465134, 'timestamp': '2025-10-01 04:23:22.368607', 'step': 8506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:22.402028', 'step': 8506, 'epoch': 2} {'type': 'loss', 'content': 0.1694961041212082, 'timestamp': '2025-10-01 04:23:22.404234', 'step': 8507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:22.437211', 'step': 8507, 'epoch': 2} {'type': 'loss', 'content': 0.119273841381073, 'timestamp': '2025-10-01 04:23:22.460907', 'step': 8508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:22.494938', 'step': 8508, 'epoch': 2} {'type': 'loss', 'content': 0.09427912533283234, 'timestamp': '2025-10-01 04:23:22.497020', 'step': 8509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:22.535614', 'step': 8509, 'epoch': 2} {'type': 'loss', 'content': 0.07288464158773422, 'timestamp': '2025-10-01 04:23:22.538433', 'step': 8510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:22.579012', 'step': 8510, 'epoch': 2} {'type': 'loss', 'content': 0.08966077119112015, 'timestamp': '2025-10-01 04:23:22.581290', 'step': 8511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:22.615406', 'step': 8511, 'epoch': 2} {'type': 'loss', 'content': 0.10387004911899567, 'timestamp': '2025-10-01 04:23:22.638855', 'step': 8512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:22.675041', 'step': 8512, 'epoch': 2} {'type': 'loss', 'content': 0.08718261122703552, 'timestamp': '2025-10-01 04:23:22.676950', 'step': 8513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:22.728699', 'step': 8513, 'epoch': 2} {'type': 'loss', 'content': 0.09405098855495453, 'timestamp': '2025-10-01 04:23:22.731293', 'step': 8514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:22.770390', 'step': 8514, 'epoch': 2} {'type': 'loss', 'content': 0.08601784706115723, 'timestamp': '2025-10-01 04:23:22.772328', 'step': 8515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:23:22.804930', 'step': 8515, 'epoch': 2} {'type': 'loss', 'content': 0.178798645734787, 'timestamp': '2025-10-01 04:23:22.830226', 'step': 8516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:22.862681', 'step': 8516, 'epoch': 2} {'type': 'loss', 'content': 0.22248463332653046, 'timestamp': '2025-10-01 04:23:22.864637', 'step': 8517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:22.913897', 'step': 8517, 'epoch': 2} {'type': 'loss', 'content': 0.05211303383111954, 'timestamp': '2025-10-01 04:23:22.916609', 'step': 8518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:22.954188', 'step': 8518, 'epoch': 2} {'type': 'loss', 'content': 0.10158450156450272, 'timestamp': '2025-10-01 04:23:22.957383', 'step': 8519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:22.995568', 'step': 8519, 'epoch': 2} {'type': 'loss', 'content': 0.1711406111717224, 'timestamp': '2025-10-01 04:23:23.019093', 'step': 8520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:23.053059', 'step': 8520, 'epoch': 2} {'type': 'loss', 'content': 0.28154051303863525, 'timestamp': '2025-10-01 04:23:23.056217', 'step': 8521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:23.101972', 'step': 8521, 'epoch': 2} {'type': 'loss', 'content': 0.07848979532718658, 'timestamp': '2025-10-01 04:23:23.104108', 'step': 8522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:23.136997', 'step': 8522, 'epoch': 2} {'type': 'loss', 'content': 0.12156639248132706, 'timestamp': '2025-10-01 04:23:23.139046', 'step': 8523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:23.178588', 'step': 8523, 'epoch': 2} {'type': 'loss', 'content': 0.12997716665267944, 'timestamp': '2025-10-01 04:23:23.202413', 'step': 8524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:23.264332', 'step': 8524, 'epoch': 2} {'type': 'loss', 'content': 0.1441076248884201, 'timestamp': '2025-10-01 04:23:23.267346', 'step': 8525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:23.300503', 'step': 8525, 'epoch': 2} {'type': 'loss', 'content': 0.15498173236846924, 'timestamp': '2025-10-01 04:23:23.302530', 'step': 8526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:23.333281', 'step': 8526, 'epoch': 2} {'type': 'loss', 'content': 0.10037605464458466, 'timestamp': '2025-10-01 04:23:23.335963', 'step': 8527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:23.368854', 'step': 8527, 'epoch': 2} {'type': 'loss', 'content': 0.15065687894821167, 'timestamp': '2025-10-01 04:23:23.392449', 'step': 8528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:23.434691', 'step': 8528, 'epoch': 2} {'type': 'loss', 'content': 0.12948305904865265, 'timestamp': '2025-10-01 04:23:23.436630', 'step': 8529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:23.467416', 'step': 8529, 'epoch': 2} {'type': 'loss', 'content': 0.13774727284908295, 'timestamp': '2025-10-01 04:23:23.469497', 'step': 8530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:23.504705', 'step': 8530, 'epoch': 2} {'type': 'loss', 'content': 0.11533667892217636, 'timestamp': '2025-10-01 04:23:23.506858', 'step': 8531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:23.538022', 'step': 8531, 'epoch': 2} {'type': 'loss', 'content': 0.133070707321167, 'timestamp': '2025-10-01 04:23:23.564428', 'step': 8532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:23.606462', 'step': 8532, 'epoch': 2} {'type': 'loss', 'content': 0.12815238535404205, 'timestamp': '2025-10-01 04:23:23.608494', 'step': 8533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:23.648864', 'step': 8533, 'epoch': 2} {'type': 'loss', 'content': 0.15128937363624573, 'timestamp': '2025-10-01 04:23:23.650960', 'step': 8534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:23.682734', 'step': 8534, 'epoch': 2} {'type': 'loss', 'content': 0.11253675073385239, 'timestamp': '2025-10-01 04:23:23.684917', 'step': 8535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:23.716331', 'step': 8535, 'epoch': 2} {'type': 'loss', 'content': 0.09284631162881851, 'timestamp': '2025-10-01 04:23:23.739900', 'step': 8536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:23.772888', 'step': 8536, 'epoch': 2} {'type': 'loss', 'content': 0.17212779819965363, 'timestamp': '2025-10-01 04:23:23.774935', 'step': 8537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:23.806844', 'step': 8537, 'epoch': 2} {'type': 'loss', 'content': 0.09465707093477249, 'timestamp': '2025-10-01 04:23:23.808919', 'step': 8538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:23.841031', 'step': 8538, 'epoch': 2} {'type': 'loss', 'content': 0.04497627541422844, 'timestamp': '2025-10-01 04:23:23.843108', 'step': 8539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:23.875804', 'step': 8539, 'epoch': 2} {'type': 'loss', 'content': 0.22053006291389465, 'timestamp': '2025-10-01 04:23:23.899513', 'step': 8540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:23.936861', 'step': 8540, 'epoch': 2} {'type': 'loss', 'content': 0.0835416316986084, 'timestamp': '2025-10-01 04:23:23.938663', 'step': 8541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:23.973803', 'step': 8541, 'epoch': 2} {'type': 'loss', 'content': 0.03689885884523392, 'timestamp': '2025-10-01 04:23:23.975684', 'step': 8542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:24.015208', 'step': 8542, 'epoch': 2} {'type': 'loss', 'content': 0.18907414376735687, 'timestamp': '2025-10-01 04:23:24.017335', 'step': 8543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:24.049481', 'step': 8543, 'epoch': 2} {'type': 'loss', 'content': 0.0637366846203804, 'timestamp': '2025-10-01 04:23:24.073101', 'step': 8544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:24.104373', 'step': 8544, 'epoch': 2} {'type': 'loss', 'content': 0.1281014382839203, 'timestamp': '2025-10-01 04:23:24.111313', 'step': 8545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:24.143067', 'step': 8545, 'epoch': 2} {'type': 'loss', 'content': 0.08167092502117157, 'timestamp': '2025-10-01 04:23:24.145544', 'step': 8546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:24.178982', 'step': 8546, 'epoch': 2} {'type': 'loss', 'content': 0.12226984649896622, 'timestamp': '2025-10-01 04:23:24.180870', 'step': 8547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:24.212118', 'step': 8547, 'epoch': 2} {'type': 'loss', 'content': 0.09964120388031006, 'timestamp': '2025-10-01 04:23:24.235573', 'step': 8548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:24.267872', 'step': 8548, 'epoch': 2} {'type': 'loss', 'content': 0.11218088865280151, 'timestamp': '2025-10-01 04:23:24.269543', 'step': 8549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:24.300380', 'step': 8549, 'epoch': 2} {'type': 'loss', 'content': 0.11532223969697952, 'timestamp': '2025-10-01 04:23:24.303106', 'step': 8550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:24.336686', 'step': 8550, 'epoch': 2} {'type': 'loss', 'content': 0.07340193539857864, 'timestamp': '2025-10-01 04:23:24.338829', 'step': 8551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:24.370323', 'step': 8551, 'epoch': 2} {'type': 'loss', 'content': 0.09627363085746765, 'timestamp': '2025-10-01 04:23:24.394112', 'step': 8552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:24.428985', 'step': 8552, 'epoch': 2} {'type': 'loss', 'content': 0.13936972618103027, 'timestamp': '2025-10-01 04:23:24.431078', 'step': 8553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:24.464800', 'step': 8553, 'epoch': 2} {'type': 'loss', 'content': 0.11660631746053696, 'timestamp': '2025-10-01 04:23:24.467040', 'step': 8554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:24.498853', 'step': 8554, 'epoch': 2} {'type': 'loss', 'content': 0.16596318781375885, 'timestamp': '2025-10-01 04:23:24.501059', 'step': 8555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:24.532204', 'step': 8555, 'epoch': 2} {'type': 'loss', 'content': 0.09544897079467773, 'timestamp': '2025-10-01 04:23:24.555756', 'step': 8556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:24.590087', 'step': 8556, 'epoch': 2} {'type': 'loss', 'content': 0.12499015778303146, 'timestamp': '2025-10-01 04:23:24.592155', 'step': 8557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:24.625922', 'step': 8557, 'epoch': 2} {'type': 'loss', 'content': 0.1566983312368393, 'timestamp': '2025-10-01 04:23:24.628356', 'step': 8558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:24.660888', 'step': 8558, 'epoch': 2} {'type': 'loss', 'content': 0.12284611165523529, 'timestamp': '2025-10-01 04:23:24.662956', 'step': 8559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:24.701170', 'step': 8559, 'epoch': 2} {'type': 'loss', 'content': 0.10247768461704254, 'timestamp': '2025-10-01 04:23:24.724701', 'step': 8560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:24.755918', 'step': 8560, 'epoch': 2} {'type': 'loss', 'content': 0.0709741860628128, 'timestamp': '2025-10-01 04:23:24.757915', 'step': 8561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:24.790553', 'step': 8561, 'epoch': 2} {'type': 'loss', 'content': 0.09110604971647263, 'timestamp': '2025-10-01 04:23:24.792744', 'step': 8562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:24.824578', 'step': 8562, 'epoch': 2} {'type': 'loss', 'content': 0.15636274218559265, 'timestamp': '2025-10-01 04:23:24.827108', 'step': 8563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:24.859724', 'step': 8563, 'epoch': 2} {'type': 'loss', 'content': 0.09900955855846405, 'timestamp': '2025-10-01 04:23:24.883446', 'step': 8564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:24.916621', 'step': 8564, 'epoch': 2} {'type': 'loss', 'content': 0.1071828082203865, 'timestamp': '2025-10-01 04:23:24.918511', 'step': 8565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:24.950602', 'step': 8565, 'epoch': 2} {'type': 'loss', 'content': 0.12020457535982132, 'timestamp': '2025-10-01 04:23:24.952620', 'step': 8566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:24.991746', 'step': 8566, 'epoch': 2} {'type': 'loss', 'content': 0.13760297000408173, 'timestamp': '2025-10-01 04:23:24.993905', 'step': 8567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:25.027809', 'step': 8567, 'epoch': 2} {'type': 'loss', 'content': 0.165839284658432, 'timestamp': '2025-10-01 04:23:25.051866', 'step': 8568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:25.087371', 'step': 8568, 'epoch': 2} {'type': 'loss', 'content': 0.09338284283876419, 'timestamp': '2025-10-01 04:23:25.089753', 'step': 8569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:25.122807', 'step': 8569, 'epoch': 2} {'type': 'loss', 'content': 0.12113688141107559, 'timestamp': '2025-10-01 04:23:25.124958', 'step': 8570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:25.179286', 'step': 8570, 'epoch': 2} {'type': 'loss', 'content': 0.11473695933818817, 'timestamp': '2025-10-01 04:23:25.182083', 'step': 8571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:25.213526', 'step': 8571, 'epoch': 2} {'type': 'loss', 'content': 0.1189250648021698, 'timestamp': '2025-10-01 04:23:25.236988', 'step': 8572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:25.270478', 'step': 8572, 'epoch': 2} {'type': 'loss', 'content': 0.1568986028432846, 'timestamp': '2025-10-01 04:23:25.272502', 'step': 8573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:25.304808', 'step': 8573, 'epoch': 2} {'type': 'loss', 'content': 0.13044656813144684, 'timestamp': '2025-10-01 04:23:25.306794', 'step': 8574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:25.352981', 'step': 8574, 'epoch': 2} {'type': 'loss', 'content': 0.07255415618419647, 'timestamp': '2025-10-01 04:23:25.354866', 'step': 8575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:25.396164', 'step': 8575, 'epoch': 2} {'type': 'loss', 'content': 0.08952470123767853, 'timestamp': '2025-10-01 04:23:25.419661', 'step': 8576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:25.451577', 'step': 8576, 'epoch': 2} {'type': 'loss', 'content': 0.08873174339532852, 'timestamp': '2025-10-01 04:23:25.453586', 'step': 8577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:25.488156', 'step': 8577, 'epoch': 2} {'type': 'loss', 'content': 0.19138231873512268, 'timestamp': '2025-10-01 04:23:25.490460', 'step': 8578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:25.522230', 'step': 8578, 'epoch': 2} {'type': 'loss', 'content': 0.18316513299942017, 'timestamp': '2025-10-01 04:23:25.525741', 'step': 8579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:25.557726', 'step': 8579, 'epoch': 2} {'type': 'loss', 'content': 0.08468072861433029, 'timestamp': '2025-10-01 04:23:25.581512', 'step': 8580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:25.616938', 'step': 8580, 'epoch': 2} {'type': 'loss', 'content': 0.20113958418369293, 'timestamp': '2025-10-01 04:23:25.621069', 'step': 8581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:25.659993', 'step': 8581, 'epoch': 2} {'type': 'loss', 'content': 0.14606335759162903, 'timestamp': '2025-10-01 04:23:25.662685', 'step': 8582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:25.702751', 'step': 8582, 'epoch': 2} {'type': 'loss', 'content': 0.10978084057569504, 'timestamp': '2025-10-01 04:23:25.704982', 'step': 8583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:25.746924', 'step': 8583, 'epoch': 2} {'type': 'loss', 'content': 0.17383459210395813, 'timestamp': '2025-10-01 04:23:25.770566', 'step': 8584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:25.809146', 'step': 8584, 'epoch': 2} {'type': 'loss', 'content': 0.1004667654633522, 'timestamp': '2025-10-01 04:23:25.811087', 'step': 8585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:25.844441', 'step': 8585, 'epoch': 2} {'type': 'loss', 'content': 0.1332559436559677, 'timestamp': '2025-10-01 04:23:25.846357', 'step': 8586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:25.888120', 'step': 8586, 'epoch': 2} {'type': 'loss', 'content': 0.11539208143949509, 'timestamp': '2025-10-01 04:23:25.890888', 'step': 8587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:25.932663', 'step': 8587, 'epoch': 2} {'type': 'loss', 'content': 0.13486842811107635, 'timestamp': '2025-10-01 04:23:25.956096', 'step': 8588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:25.998028', 'step': 8588, 'epoch': 2} {'type': 'loss', 'content': 0.11080631613731384, 'timestamp': '2025-10-01 04:23:25.999910', 'step': 8589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:26.047308', 'step': 8589, 'epoch': 2} {'type': 'loss', 'content': 0.13976791501045227, 'timestamp': '2025-10-01 04:23:26.049269', 'step': 8590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:26.096713', 'step': 8590, 'epoch': 2} {'type': 'loss', 'content': 0.1880892813205719, 'timestamp': '2025-10-01 04:23:26.098845', 'step': 8591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:26.133216', 'step': 8591, 'epoch': 2} {'type': 'loss', 'content': 0.1118977889418602, 'timestamp': '2025-10-01 04:23:26.157154', 'step': 8592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:26.201506', 'step': 8592, 'epoch': 2} {'type': 'loss', 'content': 0.18150199949741364, 'timestamp': '2025-10-01 04:23:26.203640', 'step': 8593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:26.251344', 'step': 8593, 'epoch': 2} {'type': 'loss', 'content': 0.0824914425611496, 'timestamp': '2025-10-01 04:23:26.259893', 'step': 8594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:26.309092', 'step': 8594, 'epoch': 2} {'type': 'loss', 'content': 0.12059368938207626, 'timestamp': '2025-10-01 04:23:26.314372', 'step': 8595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:26.362819', 'step': 8595, 'epoch': 2} {'type': 'loss', 'content': 0.2130325883626938, 'timestamp': '2025-10-01 04:23:26.386325', 'step': 8596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:26.428984', 'step': 8596, 'epoch': 2} {'type': 'loss', 'content': 0.08205489069223404, 'timestamp': '2025-10-01 04:23:26.432496', 'step': 8597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:26.467697', 'step': 8597, 'epoch': 2} {'type': 'loss', 'content': 0.13239198923110962, 'timestamp': '2025-10-01 04:23:26.471719', 'step': 8598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:26.509032', 'step': 8598, 'epoch': 2} {'type': 'loss', 'content': 0.12551313638687134, 'timestamp': '2025-10-01 04:23:26.511010', 'step': 8599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:26.547420', 'step': 8599, 'epoch': 2} {'type': 'loss', 'content': 0.04431552812457085, 'timestamp': '2025-10-01 04:23:26.571020', 'step': 8600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:26.615459', 'step': 8600, 'epoch': 2} {'type': 'loss', 'content': 0.09247776120901108, 'timestamp': '2025-10-01 04:23:26.617532', 'step': 8601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:26.664751', 'step': 8601, 'epoch': 2} {'type': 'loss', 'content': 0.10364732146263123, 'timestamp': '2025-10-01 04:23:26.668056', 'step': 8602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:26.715054', 'step': 8602, 'epoch': 2} {'type': 'loss', 'content': 0.10395591706037521, 'timestamp': '2025-10-01 04:23:26.716812', 'step': 8603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:26.760322', 'step': 8603, 'epoch': 2} {'type': 'loss', 'content': 0.1608973890542984, 'timestamp': '2025-10-01 04:23:26.783965', 'step': 8604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:23:26.830143', 'step': 8604, 'epoch': 2} {'type': 'loss', 'content': 0.1392935961484909, 'timestamp': '2025-10-01 04:23:26.832263', 'step': 8605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:26.874849', 'step': 8605, 'epoch': 2} {'type': 'loss', 'content': 0.08475605398416519, 'timestamp': '2025-10-01 04:23:26.876905', 'step': 8606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:26.908176', 'step': 8606, 'epoch': 2} {'type': 'loss', 'content': 0.10062438994646072, 'timestamp': '2025-10-01 04:23:26.910256', 'step': 8607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:26.947864', 'step': 8607, 'epoch': 2} {'type': 'loss', 'content': 0.10209096968173981, 'timestamp': '2025-10-01 04:23:26.971346', 'step': 8608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:27.004228', 'step': 8608, 'epoch': 2} {'type': 'loss', 'content': 0.19250322878360748, 'timestamp': '2025-10-01 04:23:27.006300', 'step': 8609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:27.040219', 'step': 8609, 'epoch': 2} {'type': 'loss', 'content': 0.09508728981018066, 'timestamp': '2025-10-01 04:23:27.042493', 'step': 8610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:27.075600', 'step': 8610, 'epoch': 2} {'type': 'loss', 'content': 0.08738445490598679, 'timestamp': '2025-10-01 04:23:27.078516', 'step': 8611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:27.114925', 'step': 8611, 'epoch': 2} {'type': 'loss', 'content': 0.09399916976690292, 'timestamp': '2025-10-01 04:23:27.139040', 'step': 8612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:27.175447', 'step': 8612, 'epoch': 2} {'type': 'loss', 'content': 0.10539952665567398, 'timestamp': '2025-10-01 04:23:27.178356', 'step': 8613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:27.226529', 'step': 8613, 'epoch': 2} {'type': 'loss', 'content': 0.10763958096504211, 'timestamp': '2025-10-01 04:23:27.228621', 'step': 8614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:27.278074', 'step': 8614, 'epoch': 2} {'type': 'loss', 'content': 0.10178821533918381, 'timestamp': '2025-10-01 04:23:27.282526', 'step': 8615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:23:27.314761', 'step': 8615, 'epoch': 2} {'type': 'loss', 'content': 0.182964488863945, 'timestamp': '2025-10-01 04:23:27.342979', 'step': 8616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:27.386279', 'step': 8616, 'epoch': 2} {'type': 'loss', 'content': 0.1495712399482727, 'timestamp': '2025-10-01 04:23:27.390382', 'step': 8617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:27.424771', 'step': 8617, 'epoch': 2} {'type': 'loss', 'content': 0.15400630235671997, 'timestamp': '2025-10-01 04:23:27.427278', 'step': 8618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:27.459706', 'step': 8618, 'epoch': 2} {'type': 'loss', 'content': 0.06988713145256042, 'timestamp': '2025-10-01 04:23:27.462966', 'step': 8619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:27.496281', 'step': 8619, 'epoch': 2} {'type': 'loss', 'content': 0.12439913302659988, 'timestamp': '2025-10-01 04:23:27.523969', 'step': 8620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:27.559856', 'step': 8620, 'epoch': 2} {'type': 'loss', 'content': 0.11644956469535828, 'timestamp': '2025-10-01 04:23:27.562456', 'step': 8621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:27.601141', 'step': 8621, 'epoch': 2} {'type': 'loss', 'content': 0.1572171300649643, 'timestamp': '2025-10-01 04:23:27.603827', 'step': 8622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:27.636209', 'step': 8622, 'epoch': 2} {'type': 'loss', 'content': 0.07643933594226837, 'timestamp': '2025-10-01 04:23:27.638803', 'step': 8623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:27.682120', 'step': 8623, 'epoch': 2} {'type': 'loss', 'content': 0.06721654534339905, 'timestamp': '2025-10-01 04:23:27.709902', 'step': 8624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:27.745328', 'step': 8624, 'epoch': 2} {'type': 'loss', 'content': 0.11414774507284164, 'timestamp': '2025-10-01 04:23:27.747426', 'step': 8625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:27.779394', 'step': 8625, 'epoch': 2} {'type': 'loss', 'content': 0.10090132057666779, 'timestamp': '2025-10-01 04:23:27.781761', 'step': 8626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:27.816324', 'step': 8626, 'epoch': 2} {'type': 'loss', 'content': 0.1591089814901352, 'timestamp': '2025-10-01 04:23:27.818652', 'step': 8627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:27.852972', 'step': 8627, 'epoch': 2} {'type': 'loss', 'content': 0.05833752825856209, 'timestamp': '2025-10-01 04:23:27.876802', 'step': 8628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:27.930993', 'step': 8628, 'epoch': 2} {'type': 'loss', 'content': 0.07981684803962708, 'timestamp': '2025-10-01 04:23:27.933309', 'step': 8629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:27.982625', 'step': 8629, 'epoch': 2} {'type': 'loss', 'content': 0.1472339630126953, 'timestamp': '2025-10-01 04:23:27.985037', 'step': 8630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:28.017842', 'step': 8630, 'epoch': 2} {'type': 'loss', 'content': 0.11802390217781067, 'timestamp': '2025-10-01 04:23:28.020265', 'step': 8631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:28.053031', 'step': 8631, 'epoch': 2} {'type': 'loss', 'content': 0.13941042125225067, 'timestamp': '2025-10-01 04:23:28.076980', 'step': 8632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:28.109626', 'step': 8632, 'epoch': 2} {'type': 'loss', 'content': 0.1032124012708664, 'timestamp': '2025-10-01 04:23:28.115572', 'step': 8633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:28.147150', 'step': 8633, 'epoch': 2} {'type': 'loss', 'content': 0.1390853226184845, 'timestamp': '2025-10-01 04:23:28.149550', 'step': 8634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:28.187600', 'step': 8634, 'epoch': 2} {'type': 'loss', 'content': 0.09630177915096283, 'timestamp': '2025-10-01 04:23:28.190070', 'step': 8635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:28.225829', 'step': 8635, 'epoch': 2} {'type': 'loss', 'content': 0.16331370174884796, 'timestamp': '2025-10-01 04:23:28.250462', 'step': 8636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:28.290854', 'step': 8636, 'epoch': 2} {'type': 'loss', 'content': 0.11101167649030685, 'timestamp': '2025-10-01 04:23:28.293338', 'step': 8637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:28.327601', 'step': 8637, 'epoch': 2} {'type': 'loss', 'content': 0.1431831568479538, 'timestamp': '2025-10-01 04:23:28.330054', 'step': 8638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:28.363922', 'step': 8638, 'epoch': 2} {'type': 'loss', 'content': 0.11408168822526932, 'timestamp': '2025-10-01 04:23:28.365937', 'step': 8639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:28.403961', 'step': 8639, 'epoch': 2} {'type': 'loss', 'content': 0.16513100266456604, 'timestamp': '2025-10-01 04:23:28.427876', 'step': 8640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:28.460852', 'step': 8640, 'epoch': 2} {'type': 'loss', 'content': 0.11929843574762344, 'timestamp': '2025-10-01 04:23:28.463482', 'step': 8641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:28.503369', 'step': 8641, 'epoch': 2} {'type': 'loss', 'content': 0.11927801370620728, 'timestamp': '2025-10-01 04:23:28.505588', 'step': 8642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:28.537698', 'step': 8642, 'epoch': 2} {'type': 'loss', 'content': 0.1529398113489151, 'timestamp': '2025-10-01 04:23:28.539857', 'step': 8643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:28.588893', 'step': 8643, 'epoch': 2} {'type': 'loss', 'content': 0.0717882290482521, 'timestamp': '2025-10-01 04:23:28.612443', 'step': 8644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:28.651608', 'step': 8644, 'epoch': 2} {'type': 'loss', 'content': 0.03931964933872223, 'timestamp': '2025-10-01 04:23:28.653588', 'step': 8645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:28.691060', 'step': 8645, 'epoch': 2} {'type': 'loss', 'content': 0.1328708976507187, 'timestamp': '2025-10-01 04:23:28.692937', 'step': 8646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:28.729206', 'step': 8646, 'epoch': 2} {'type': 'loss', 'content': 0.17040066421031952, 'timestamp': '2025-10-01 04:23:28.741736', 'step': 8647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:28.773284', 'step': 8647, 'epoch': 2} {'type': 'loss', 'content': 0.16019544005393982, 'timestamp': '2025-10-01 04:23:28.797571', 'step': 8648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:28.829458', 'step': 8648, 'epoch': 2} {'type': 'loss', 'content': 0.0818566232919693, 'timestamp': '2025-10-01 04:23:28.831614', 'step': 8649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:28.862354', 'step': 8649, 'epoch': 2} {'type': 'loss', 'content': 0.09536894410848618, 'timestamp': '2025-10-01 04:23:28.864758', 'step': 8650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:28.900966', 'step': 8650, 'epoch': 2} {'type': 'loss', 'content': 0.1728105992078781, 'timestamp': '2025-10-01 04:23:28.902755', 'step': 8651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:28.934551', 'step': 8651, 'epoch': 2} {'type': 'loss', 'content': 0.058009423315525055, 'timestamp': '2025-10-01 04:23:28.958347', 'step': 8652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:28.995306', 'step': 8652, 'epoch': 2} {'type': 'loss', 'content': 0.11553331464529037, 'timestamp': '2025-10-01 04:23:28.997236', 'step': 8653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.027978', 'step': 8653, 'epoch': 2} {'type': 'loss', 'content': 0.1439805030822754, 'timestamp': '2025-10-01 04:23:29.030070', 'step': 8654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.073911', 'step': 8654, 'epoch': 2} {'type': 'loss', 'content': 0.12240584194660187, 'timestamp': '2025-10-01 04:23:29.076529', 'step': 8655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.107733', 'step': 8655, 'epoch': 2} {'type': 'loss', 'content': 0.12066873908042908, 'timestamp': '2025-10-01 04:23:29.131250', 'step': 8656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.175527', 'step': 8656, 'epoch': 2} {'type': 'loss', 'content': 0.11761800944805145, 'timestamp': '2025-10-01 04:23:29.177294', 'step': 8657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.215314', 'step': 8657, 'epoch': 2} {'type': 'loss', 'content': 0.12327582389116287, 'timestamp': '2025-10-01 04:23:29.217416', 'step': 8658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.257504', 'step': 8658, 'epoch': 2} {'type': 'loss', 'content': 0.07225657999515533, 'timestamp': '2025-10-01 04:23:29.259326', 'step': 8659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:29.299180', 'step': 8659, 'epoch': 2} {'type': 'loss', 'content': 0.08983586728572845, 'timestamp': '2025-10-01 04:23:29.322737', 'step': 8660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.355878', 'step': 8660, 'epoch': 2} {'type': 'loss', 'content': 0.03643909841775894, 'timestamp': '2025-10-01 04:23:29.357938', 'step': 8661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:29.397263', 'step': 8661, 'epoch': 2} {'type': 'loss', 'content': 0.08473137021064758, 'timestamp': '2025-10-01 04:23:29.399207', 'step': 8662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:29.430324', 'step': 8662, 'epoch': 2} {'type': 'loss', 'content': 0.10631809383630753, 'timestamp': '2025-10-01 04:23:29.443400', 'step': 8663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:29.490666', 'step': 8663, 'epoch': 2} {'type': 'loss', 'content': 0.08949501067399979, 'timestamp': '2025-10-01 04:23:29.515745', 'step': 8664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.546959', 'step': 8664, 'epoch': 2} {'type': 'loss', 'content': 0.10676902532577515, 'timestamp': '2025-10-01 04:23:29.548815', 'step': 8665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:29.580013', 'step': 8665, 'epoch': 2} {'type': 'loss', 'content': 0.18885883688926697, 'timestamp': '2025-10-01 04:23:29.582220', 'step': 8666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.616594', 'step': 8666, 'epoch': 2} {'type': 'loss', 'content': 0.09727606177330017, 'timestamp': '2025-10-01 04:23:29.618804', 'step': 8667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.652373', 'step': 8667, 'epoch': 2} {'type': 'loss', 'content': 0.07084175944328308, 'timestamp': '2025-10-01 04:23:29.676240', 'step': 8668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:29.707856', 'step': 8668, 'epoch': 2} {'type': 'loss', 'content': 0.08122263103723526, 'timestamp': '2025-10-01 04:23:29.710030', 'step': 8669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:29.745764', 'step': 8669, 'epoch': 2} {'type': 'loss', 'content': 0.20631125569343567, 'timestamp': '2025-10-01 04:23:29.748228', 'step': 8670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.781643', 'step': 8670, 'epoch': 2} {'type': 'loss', 'content': 0.08558455109596252, 'timestamp': '2025-10-01 04:23:29.783930', 'step': 8671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:23:29.820867', 'step': 8671, 'epoch': 2} {'type': 'loss', 'content': 0.14689815044403076, 'timestamp': '2025-10-01 04:23:29.848925', 'step': 8672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:29.885585', 'step': 8672, 'epoch': 2} {'type': 'loss', 'content': 0.09264234453439713, 'timestamp': '2025-10-01 04:23:29.887759', 'step': 8673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:29.919509', 'step': 8673, 'epoch': 2} {'type': 'loss', 'content': 0.1023915484547615, 'timestamp': '2025-10-01 04:23:29.921493', 'step': 8674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:29.960221', 'step': 8674, 'epoch': 2} {'type': 'loss', 'content': 0.13453476130962372, 'timestamp': '2025-10-01 04:23:29.962272', 'step': 8675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:30.005868', 'step': 8675, 'epoch': 2} {'type': 'loss', 'content': 0.11652674525976181, 'timestamp': '2025-10-01 04:23:30.029985', 'step': 8676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:30.069341', 'step': 8676, 'epoch': 2} {'type': 'loss', 'content': 0.11648344248533249, 'timestamp': '2025-10-01 04:23:30.071753', 'step': 8677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:30.103406', 'step': 8677, 'epoch': 2} {'type': 'loss', 'content': 0.09709219634532928, 'timestamp': '2025-10-01 04:23:30.105397', 'step': 8678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:30.136265', 'step': 8678, 'epoch': 2} {'type': 'loss', 'content': 0.10454757511615753, 'timestamp': '2025-10-01 04:23:30.138339', 'step': 8679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:30.174425', 'step': 8679, 'epoch': 2} {'type': 'loss', 'content': 0.09233435988426208, 'timestamp': '2025-10-01 04:23:30.197960', 'step': 8680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:30.229469', 'step': 8680, 'epoch': 2} {'type': 'loss', 'content': 0.16875116527080536, 'timestamp': '2025-10-01 04:23:30.231559', 'step': 8681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:30.263099', 'step': 8681, 'epoch': 2} {'type': 'loss', 'content': 0.07883236557245255, 'timestamp': '2025-10-01 04:23:30.265084', 'step': 8682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:30.300949', 'step': 8682, 'epoch': 2} {'type': 'loss', 'content': 0.11138025671243668, 'timestamp': '2025-10-01 04:23:30.303010', 'step': 8683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:30.335641', 'step': 8683, 'epoch': 2} {'type': 'loss', 'content': 0.07415814697742462, 'timestamp': '2025-10-01 04:23:30.359374', 'step': 8684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:30.392554', 'step': 8684, 'epoch': 2} {'type': 'loss', 'content': 0.10625305026769638, 'timestamp': '2025-10-01 04:23:30.394869', 'step': 8685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:30.448815', 'step': 8685, 'epoch': 2} {'type': 'loss', 'content': 0.23790748417377472, 'timestamp': '2025-10-01 04:23:30.453976', 'step': 8686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:30.491395', 'step': 8686, 'epoch': 2} {'type': 'loss', 'content': 0.14599287509918213, 'timestamp': '2025-10-01 04:23:30.494383', 'step': 8687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:30.546617', 'step': 8687, 'epoch': 2} {'type': 'loss', 'content': 0.1453264057636261, 'timestamp': '2025-10-01 04:23:30.570997', 'step': 8688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:30.612731', 'step': 8688, 'epoch': 2} {'type': 'loss', 'content': 0.11320718377828598, 'timestamp': '2025-10-01 04:23:30.617986', 'step': 8689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:30.654343', 'step': 8689, 'epoch': 2} {'type': 'loss', 'content': 0.14491058886051178, 'timestamp': '2025-10-01 04:23:30.656244', 'step': 8690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:30.689462', 'step': 8690, 'epoch': 2} {'type': 'loss', 'content': 0.20704995095729828, 'timestamp': '2025-10-01 04:23:30.692081', 'step': 8691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:30.731995', 'step': 8691, 'epoch': 2} {'type': 'loss', 'content': 0.14633972942829132, 'timestamp': '2025-10-01 04:23:30.758981', 'step': 8692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:30.790307', 'step': 8692, 'epoch': 2} {'type': 'loss', 'content': 0.1406809538602829, 'timestamp': '2025-10-01 04:23:30.792303', 'step': 8693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:30.823525', 'step': 8693, 'epoch': 2} {'type': 'loss', 'content': 0.12261862307786942, 'timestamp': '2025-10-01 04:23:30.826033', 'step': 8694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:30.867164', 'step': 8694, 'epoch': 2} {'type': 'loss', 'content': 0.07406400889158249, 'timestamp': '2025-10-01 04:23:30.869232', 'step': 8695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:30.903788', 'step': 8695, 'epoch': 2} {'type': 'loss', 'content': 0.10438362509012222, 'timestamp': '2025-10-01 04:23:30.927636', 'step': 8696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:30.962110', 'step': 8696, 'epoch': 2} {'type': 'loss', 'content': 0.15943095088005066, 'timestamp': '2025-10-01 04:23:30.964142', 'step': 8697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-10-01 04:23:31.004123', 'step': 8697, 'epoch': 2} {'type': 'loss', 'content': 0.22633323073387146, 'timestamp': '2025-10-01 04:23:31.014811', 'step': 8698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:31.051050', 'step': 8698, 'epoch': 2} {'type': 'loss', 'content': 0.10642168670892715, 'timestamp': '2025-10-01 04:23:31.053180', 'step': 8699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.086740', 'step': 8699, 'epoch': 2} {'type': 'loss', 'content': 0.15686878561973572, 'timestamp': '2025-10-01 04:23:31.110676', 'step': 8700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:31.146805', 'step': 8700, 'epoch': 2} {'type': 'loss', 'content': 0.19837625324726105, 'timestamp': '2025-10-01 04:23:31.148825', 'step': 8701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:31.193392', 'step': 8701, 'epoch': 2} {'type': 'loss', 'content': 0.11108765751123428, 'timestamp': '2025-10-01 04:23:31.195354', 'step': 8702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:31.228061', 'step': 8702, 'epoch': 2} {'type': 'loss', 'content': 0.09935446083545685, 'timestamp': '2025-10-01 04:23:31.230010', 'step': 8703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.268097', 'step': 8703, 'epoch': 2} {'type': 'loss', 'content': 0.10310543328523636, 'timestamp': '2025-10-01 04:23:31.291647', 'step': 8704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.323480', 'step': 8704, 'epoch': 2} {'type': 'loss', 'content': 0.13142479956150055, 'timestamp': '2025-10-01 04:23:31.325740', 'step': 8705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:31.368278', 'step': 8705, 'epoch': 2} {'type': 'loss', 'content': 0.09143030643463135, 'timestamp': '2025-10-01 04:23:31.376463', 'step': 8706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:31.416113', 'step': 8706, 'epoch': 2} {'type': 'loss', 'content': 0.10339350998401642, 'timestamp': '2025-10-01 04:23:31.421097', 'step': 8707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.452690', 'step': 8707, 'epoch': 2} {'type': 'loss', 'content': 0.07401151955127716, 'timestamp': '2025-10-01 04:23:31.476242', 'step': 8708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.511892', 'step': 8708, 'epoch': 2} {'type': 'loss', 'content': 0.12667356431484222, 'timestamp': '2025-10-01 04:23:31.513973', 'step': 8709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.544996', 'step': 8709, 'epoch': 2} {'type': 'loss', 'content': 0.10338598489761353, 'timestamp': '2025-10-01 04:23:31.546944', 'step': 8710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:31.579071', 'step': 8710, 'epoch': 2} {'type': 'loss', 'content': 0.08068592101335526, 'timestamp': '2025-10-01 04:23:31.581031', 'step': 8711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:31.611980', 'step': 8711, 'epoch': 2} {'type': 'loss', 'content': 0.11157064884901047, 'timestamp': '2025-10-01 04:23:31.635615', 'step': 8712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.667423', 'step': 8712, 'epoch': 2} {'type': 'loss', 'content': 0.11158173531293869, 'timestamp': '2025-10-01 04:23:31.669660', 'step': 8713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.718049', 'step': 8713, 'epoch': 2} {'type': 'loss', 'content': 0.12120030075311661, 'timestamp': '2025-10-01 04:23:31.721575', 'step': 8714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.762583', 'step': 8714, 'epoch': 2} {'type': 'loss', 'content': 0.12646354734897614, 'timestamp': '2025-10-01 04:23:31.764534', 'step': 8715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:31.796027', 'step': 8715, 'epoch': 2} {'type': 'loss', 'content': 0.14062857627868652, 'timestamp': '2025-10-01 04:23:31.819224', 'step': 8716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.860698', 'step': 8716, 'epoch': 2} {'type': 'loss', 'content': 0.10063070058822632, 'timestamp': '2025-10-01 04:23:31.863511', 'step': 8717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.904347', 'step': 8717, 'epoch': 2} {'type': 'loss', 'content': 0.16497360169887543, 'timestamp': '2025-10-01 04:23:31.906439', 'step': 8718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.941845', 'step': 8718, 'epoch': 2} {'type': 'loss', 'content': 0.1742313653230667, 'timestamp': '2025-10-01 04:23:31.943427', 'step': 8719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:31.974906', 'step': 8719, 'epoch': 2} {'type': 'loss', 'content': 0.161682590842247, 'timestamp': '2025-10-01 04:23:31.998469', 'step': 8720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:32.029257', 'step': 8720, 'epoch': 2} {'type': 'loss', 'content': 0.1099618524312973, 'timestamp': '2025-10-01 04:23:32.031210', 'step': 8721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:32.063904', 'step': 8721, 'epoch': 2} {'type': 'loss', 'content': 0.2049436867237091, 'timestamp': '2025-10-01 04:23:32.066364', 'step': 8722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:32.097928', 'step': 8722, 'epoch': 2} {'type': 'loss', 'content': 0.10256653279066086, 'timestamp': '2025-10-01 04:23:32.100013', 'step': 8723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:32.132630', 'step': 8723, 'epoch': 2} {'type': 'loss', 'content': 0.07502254098653793, 'timestamp': '2025-10-01 04:23:32.156168', 'step': 8724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:32.188300', 'step': 8724, 'epoch': 2} {'type': 'loss', 'content': 0.0893862172961235, 'timestamp': '2025-10-01 04:23:32.190281', 'step': 8725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:32.221381', 'step': 8725, 'epoch': 2} {'type': 'loss', 'content': 0.09958471357822418, 'timestamp': '2025-10-01 04:23:32.223444', 'step': 8726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:32.253978', 'step': 8726, 'epoch': 2} {'type': 'loss', 'content': 0.18822704255580902, 'timestamp': '2025-10-01 04:23:32.255888', 'step': 8727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:32.294194', 'step': 8727, 'epoch': 2} {'type': 'loss', 'content': 0.03924373537302017, 'timestamp': '2025-10-01 04:23:32.317797', 'step': 8728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:32.349368', 'step': 8728, 'epoch': 2} {'type': 'loss', 'content': 0.10118589550256729, 'timestamp': '2025-10-01 04:23:32.352376', 'step': 8729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:32.387510', 'step': 8729, 'epoch': 2} {'type': 'loss', 'content': 0.13344933092594147, 'timestamp': '2025-10-01 04:23:32.389941', 'step': 8730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:32.421391', 'step': 8730, 'epoch': 2} {'type': 'loss', 'content': 0.13906708359718323, 'timestamp': '2025-10-01 04:23:32.426116', 'step': 8731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:32.465533', 'step': 8731, 'epoch': 2} {'type': 'loss', 'content': 0.08232270926237106, 'timestamp': '2025-10-01 04:23:32.492174', 'step': 8732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:32.525115', 'step': 8732, 'epoch': 2} {'type': 'loss', 'content': 0.1413322389125824, 'timestamp': '2025-10-01 04:23:32.526854', 'step': 8733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:32.567669', 'step': 8733, 'epoch': 2} {'type': 'loss', 'content': 0.0969393327832222, 'timestamp': '2025-10-01 04:23:32.569777', 'step': 8734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:32.611686', 'step': 8734, 'epoch': 2} {'type': 'loss', 'content': 0.05856344848871231, 'timestamp': '2025-10-01 04:23:32.619169', 'step': 8735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:32.665218', 'step': 8735, 'epoch': 2} {'type': 'loss', 'content': 0.14476251602172852, 'timestamp': '2025-10-01 04:23:32.688728', 'step': 8736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:32.720415', 'step': 8736, 'epoch': 2} {'type': 'loss', 'content': 0.16145110130310059, 'timestamp': '2025-10-01 04:23:32.722495', 'step': 8737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:32.761058', 'step': 8737, 'epoch': 2} {'type': 'loss', 'content': 0.14348235726356506, 'timestamp': '2025-10-01 04:23:32.763083', 'step': 8738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:32.794098', 'step': 8738, 'epoch': 2} {'type': 'loss', 'content': 0.10849025845527649, 'timestamp': '2025-10-01 04:23:32.796145', 'step': 8739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:32.827119', 'step': 8739, 'epoch': 2} {'type': 'loss', 'content': 0.12064015865325928, 'timestamp': '2025-10-01 04:23:32.851025', 'step': 8740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:32.890238', 'step': 8740, 'epoch': 2} {'type': 'loss', 'content': 0.12147951126098633, 'timestamp': '2025-10-01 04:23:32.893715', 'step': 8741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:32.924437', 'step': 8741, 'epoch': 2} {'type': 'loss', 'content': 0.09942997246980667, 'timestamp': '2025-10-01 04:23:32.927556', 'step': 8742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:32.969887', 'step': 8742, 'epoch': 2} {'type': 'loss', 'content': 0.12462129443883896, 'timestamp': '2025-10-01 04:23:32.972541', 'step': 8743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:33.003324', 'step': 8743, 'epoch': 2} {'type': 'loss', 'content': 0.11834908276796341, 'timestamp': '2025-10-01 04:23:33.026973', 'step': 8744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:33.061687', 'step': 8744, 'epoch': 2} {'type': 'loss', 'content': 0.0737924799323082, 'timestamp': '2025-10-01 04:23:33.063744', 'step': 8745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:33.094331', 'step': 8745, 'epoch': 2} {'type': 'loss', 'content': 0.15833958983421326, 'timestamp': '2025-10-01 04:23:33.096336', 'step': 8746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:33.129111', 'step': 8746, 'epoch': 2} {'type': 'loss', 'content': 0.17229899764060974, 'timestamp': '2025-10-01 04:23:33.132673', 'step': 8747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:33.164830', 'step': 8747, 'epoch': 2} {'type': 'loss', 'content': 0.10307588428258896, 'timestamp': '2025-10-01 04:23:33.191904', 'step': 8748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:33.233448', 'step': 8748, 'epoch': 2} {'type': 'loss', 'content': 0.1338224560022354, 'timestamp': '2025-10-01 04:23:33.235258', 'step': 8749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:33.266931', 'step': 8749, 'epoch': 2} {'type': 'loss', 'content': 0.12477855384349823, 'timestamp': '2025-10-01 04:23:33.271737', 'step': 8750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:33.315135', 'step': 8750, 'epoch': 2} {'type': 'loss', 'content': 0.14346402883529663, 'timestamp': '2025-10-01 04:23:33.320776', 'step': 8751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:33.355796', 'step': 8751, 'epoch': 2} {'type': 'loss', 'content': 0.061015743762254715, 'timestamp': '2025-10-01 04:23:33.379249', 'step': 8752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:33.409881', 'step': 8752, 'epoch': 2} {'type': 'loss', 'content': 0.0825035497546196, 'timestamp': '2025-10-01 04:23:33.411859', 'step': 8753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:33.442298', 'step': 8753, 'epoch': 2} {'type': 'loss', 'content': 0.09351927787065506, 'timestamp': '2025-10-01 04:23:33.444751', 'step': 8754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:33.484646', 'step': 8754, 'epoch': 2} {'type': 'loss', 'content': 0.11528330296278, 'timestamp': '2025-10-01 04:23:33.486552', 'step': 8755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:33.519706', 'step': 8755, 'epoch': 2} {'type': 'loss', 'content': 0.15694086253643036, 'timestamp': '2025-10-01 04:23:33.543254', 'step': 8756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:33.576695', 'step': 8756, 'epoch': 2} {'type': 'loss', 'content': 0.07581902295351028, 'timestamp': '2025-10-01 04:23:33.579617', 'step': 8757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:33.611407', 'step': 8757, 'epoch': 2} {'type': 'loss', 'content': 0.11460480093955994, 'timestamp': '2025-10-01 04:23:33.613635', 'step': 8758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:33.646218', 'step': 8758, 'epoch': 2} {'type': 'loss', 'content': 0.10281994193792343, 'timestamp': '2025-10-01 04:23:33.648219', 'step': 8759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:33.679692', 'step': 8759, 'epoch': 2} {'type': 'loss', 'content': 0.07454245537519455, 'timestamp': '2025-10-01 04:23:33.703351', 'step': 8760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:33.734039', 'step': 8760, 'epoch': 2} {'type': 'loss', 'content': 0.20151419937610626, 'timestamp': '2025-10-01 04:23:33.735874', 'step': 8761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:33.765978', 'step': 8761, 'epoch': 2} {'type': 'loss', 'content': 0.13032396137714386, 'timestamp': '2025-10-01 04:23:33.767942', 'step': 8762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:33.798865', 'step': 8762, 'epoch': 2} {'type': 'loss', 'content': 0.18960830569267273, 'timestamp': '2025-10-01 04:23:33.800945', 'step': 8763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:33.835921', 'step': 8763, 'epoch': 2} {'type': 'loss', 'content': 0.11891786009073257, 'timestamp': '2025-10-01 04:23:33.859451', 'step': 8764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:33.889797', 'step': 8764, 'epoch': 2} {'type': 'loss', 'content': 0.11480046063661575, 'timestamp': '2025-10-01 04:23:33.891807', 'step': 8765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:33.921878', 'step': 8765, 'epoch': 2} {'type': 'loss', 'content': 0.23332402110099792, 'timestamp': '2025-10-01 04:23:33.923728', 'step': 8766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:33.958578', 'step': 8766, 'epoch': 2} {'type': 'loss', 'content': 0.13954511284828186, 'timestamp': '2025-10-01 04:23:33.960650', 'step': 8767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:33.995774', 'step': 8767, 'epoch': 2} {'type': 'loss', 'content': 0.11557003110647202, 'timestamp': '2025-10-01 04:23:34.019630', 'step': 8768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:34.050588', 'step': 8768, 'epoch': 2} {'type': 'loss', 'content': 0.05769848823547363, 'timestamp': '2025-10-01 04:23:34.052474', 'step': 8769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:34.082597', 'step': 8769, 'epoch': 2} {'type': 'loss', 'content': 0.0890384316444397, 'timestamp': '2025-10-01 04:23:34.084508', 'step': 8770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:34.114414', 'step': 8770, 'epoch': 2} {'type': 'loss', 'content': 0.1059158518910408, 'timestamp': '2025-10-01 04:23:34.116475', 'step': 8771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:34.146147', 'step': 8771, 'epoch': 2} {'type': 'loss', 'content': 0.09938813745975494, 'timestamp': '2025-10-01 04:23:34.170110', 'step': 8772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:34.206299', 'step': 8772, 'epoch': 2} {'type': 'loss', 'content': 0.11138980835676193, 'timestamp': '2025-10-01 04:23:34.208378', 'step': 8773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:34.248634', 'step': 8773, 'epoch': 2} {'type': 'loss', 'content': 0.11130846291780472, 'timestamp': '2025-10-01 04:23:34.251174', 'step': 8774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:34.282171', 'step': 8774, 'epoch': 2} {'type': 'loss', 'content': 0.07786889374256134, 'timestamp': '2025-10-01 04:23:34.284893', 'step': 8775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:34.315227', 'step': 8775, 'epoch': 2} {'type': 'loss', 'content': 0.11206167191267014, 'timestamp': '2025-10-01 04:23:34.338672', 'step': 8776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:34.369951', 'step': 8776, 'epoch': 2} {'type': 'loss', 'content': 0.06901107728481293, 'timestamp': '2025-10-01 04:23:34.371889', 'step': 8777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:34.401989', 'step': 8777, 'epoch': 2} {'type': 'loss', 'content': 0.10293876379728317, 'timestamp': '2025-10-01 04:23:34.404187', 'step': 8778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:34.434795', 'step': 8778, 'epoch': 2} {'type': 'loss', 'content': 0.12115215510129929, 'timestamp': '2025-10-01 04:23:34.437306', 'step': 8779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:34.470348', 'step': 8779, 'epoch': 2} {'type': 'loss', 'content': 0.038678571581840515, 'timestamp': '2025-10-01 04:23:34.493954', 'step': 8780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:34.524104', 'step': 8780, 'epoch': 2} {'type': 'loss', 'content': 0.1429043710231781, 'timestamp': '2025-10-01 04:23:34.526139', 'step': 8781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:23:34.558229', 'step': 8781, 'epoch': 2} {'type': 'loss', 'content': 0.08477896451950073, 'timestamp': '2025-10-01 04:23:34.562796', 'step': 8782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:34.595438', 'step': 8782, 'epoch': 2} {'type': 'loss', 'content': 0.0909653976559639, 'timestamp': '2025-10-01 04:23:34.597341', 'step': 8783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:34.628629', 'step': 8783, 'epoch': 2} {'type': 'loss', 'content': 0.10121535509824753, 'timestamp': '2025-10-01 04:23:34.652389', 'step': 8784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:34.682696', 'step': 8784, 'epoch': 2} {'type': 'loss', 'content': 0.16422899067401886, 'timestamp': '2025-10-01 04:23:34.684341', 'step': 8785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:34.714162', 'step': 8785, 'epoch': 2} {'type': 'loss', 'content': 0.06291626393795013, 'timestamp': '2025-10-01 04:23:34.716316', 'step': 8786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:34.746332', 'step': 8786, 'epoch': 2} {'type': 'loss', 'content': 0.11690845340490341, 'timestamp': '2025-10-01 04:23:34.749374', 'step': 8787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:34.779344', 'step': 8787, 'epoch': 2} {'type': 'loss', 'content': 0.11329881846904755, 'timestamp': '2025-10-01 04:23:34.803291', 'step': 8788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:34.834849', 'step': 8788, 'epoch': 2} {'type': 'loss', 'content': 0.11011815816164017, 'timestamp': '2025-10-01 04:23:34.837063', 'step': 8789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:34.870523', 'step': 8789, 'epoch': 2} {'type': 'loss', 'content': 0.05896766856312752, 'timestamp': '2025-10-01 04:23:34.872749', 'step': 8790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:34.903188', 'step': 8790, 'epoch': 2} {'type': 'loss', 'content': 0.08096973598003387, 'timestamp': '2025-10-01 04:23:34.914618', 'step': 8791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:34.945412', 'step': 8791, 'epoch': 2} {'type': 'loss', 'content': 0.20789678394794464, 'timestamp': '2025-10-01 04:23:34.968888', 'step': 8792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:34.999067', 'step': 8792, 'epoch': 2} {'type': 'loss', 'content': 0.12738870084285736, 'timestamp': '2025-10-01 04:23:35.001289', 'step': 8793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:35.031381', 'step': 8793, 'epoch': 2} {'type': 'loss', 'content': 0.11763828247785568, 'timestamp': '2025-10-01 04:23:35.033996', 'step': 8794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:35.065075', 'step': 8794, 'epoch': 2} {'type': 'loss', 'content': 0.14767855405807495, 'timestamp': '2025-10-01 04:23:35.067108', 'step': 8795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.097602', 'step': 8795, 'epoch': 2} {'type': 'loss', 'content': 0.06930690258741379, 'timestamp': '2025-10-01 04:23:35.121345', 'step': 8796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:35.151616', 'step': 8796, 'epoch': 2} {'type': 'loss', 'content': 0.07410162687301636, 'timestamp': '2025-10-01 04:23:35.153328', 'step': 8797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:35.185344', 'step': 8797, 'epoch': 2} {'type': 'loss', 'content': 0.05751275271177292, 'timestamp': '2025-10-01 04:23:35.187122', 'step': 8798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:35.216654', 'step': 8798, 'epoch': 2} {'type': 'loss', 'content': 0.11964941769838333, 'timestamp': '2025-10-01 04:23:35.218233', 'step': 8799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:35.247908', 'step': 8799, 'epoch': 2} {'type': 'loss', 'content': 0.051213398575782776, 'timestamp': '2025-10-01 04:23:35.271688', 'step': 8800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.301354', 'step': 8800, 'epoch': 2} {'type': 'loss', 'content': 0.13646642863750458, 'timestamp': '2025-10-01 04:23:35.303437', 'step': 8801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.334193', 'step': 8801, 'epoch': 2} {'type': 'loss', 'content': 0.097313791513443, 'timestamp': '2025-10-01 04:23:35.336341', 'step': 8802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:35.366855', 'step': 8802, 'epoch': 2} {'type': 'loss', 'content': 0.055590253323316574, 'timestamp': '2025-10-01 04:23:35.368927', 'step': 8803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:35.401177', 'step': 8803, 'epoch': 2} {'type': 'loss', 'content': 0.06569451838731766, 'timestamp': '2025-10-01 04:23:35.424598', 'step': 8804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:35.455442', 'step': 8804, 'epoch': 2} {'type': 'loss', 'content': 0.16695959866046906, 'timestamp': '2025-10-01 04:23:35.457320', 'step': 8805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.492152', 'step': 8805, 'epoch': 2} {'type': 'loss', 'content': 0.09120697528123856, 'timestamp': '2025-10-01 04:23:35.494084', 'step': 8806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:35.531496', 'step': 8806, 'epoch': 2} {'type': 'loss', 'content': 0.11204665899276733, 'timestamp': '2025-10-01 04:23:35.533425', 'step': 8807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.566804', 'step': 8807, 'epoch': 2} {'type': 'loss', 'content': 0.1891276091337204, 'timestamp': '2025-10-01 04:23:35.590193', 'step': 8808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.621214', 'step': 8808, 'epoch': 2} {'type': 'loss', 'content': 0.18352723121643066, 'timestamp': '2025-10-01 04:23:35.623074', 'step': 8809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:35.654658', 'step': 8809, 'epoch': 2} {'type': 'loss', 'content': 0.08886144310235977, 'timestamp': '2025-10-01 04:23:35.657069', 'step': 8810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.690339', 'step': 8810, 'epoch': 2} {'type': 'loss', 'content': 0.11661302298307419, 'timestamp': '2025-10-01 04:23:35.692280', 'step': 8811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:35.728747', 'step': 8811, 'epoch': 2} {'type': 'loss', 'content': 0.07171811908483505, 'timestamp': '2025-10-01 04:23:35.755520', 'step': 8812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.786101', 'step': 8812, 'epoch': 2} {'type': 'loss', 'content': 0.11969663947820663, 'timestamp': '2025-10-01 04:23:35.788023', 'step': 8813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.819927', 'step': 8813, 'epoch': 2} {'type': 'loss', 'content': 0.11211594194173813, 'timestamp': '2025-10-01 04:23:35.821921', 'step': 8814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.852587', 'step': 8814, 'epoch': 2} {'type': 'loss', 'content': 0.191426083445549, 'timestamp': '2025-10-01 04:23:35.854756', 'step': 8815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:35.885539', 'step': 8815, 'epoch': 2} {'type': 'loss', 'content': 0.09153961390256882, 'timestamp': '2025-10-01 04:23:35.908986', 'step': 8816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:35.942185', 'step': 8816, 'epoch': 2} {'type': 'loss', 'content': 0.1361824870109558, 'timestamp': '2025-10-01 04:23:35.944759', 'step': 8817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:35.979505', 'step': 8817, 'epoch': 2} {'type': 'loss', 'content': 0.18073277175426483, 'timestamp': '2025-10-01 04:23:35.981564', 'step': 8818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:36.014124', 'step': 8818, 'epoch': 2} {'type': 'loss', 'content': 0.17623090744018555, 'timestamp': '2025-10-01 04:23:36.019447', 'step': 8819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:36.055893', 'step': 8819, 'epoch': 2} {'type': 'loss', 'content': 0.18033529818058014, 'timestamp': '2025-10-01 04:23:36.079434', 'step': 8820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:36.111878', 'step': 8820, 'epoch': 2} {'type': 'loss', 'content': 0.1106041967868805, 'timestamp': '2025-10-01 04:23:36.114146', 'step': 8821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:36.146714', 'step': 8821, 'epoch': 2} {'type': 'loss', 'content': 0.05451415106654167, 'timestamp': '2025-10-01 04:23:36.152722', 'step': 8822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:36.185693', 'step': 8822, 'epoch': 2} {'type': 'loss', 'content': 0.09584464132785797, 'timestamp': '2025-10-01 04:23:36.188545', 'step': 8823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:36.221151', 'step': 8823, 'epoch': 2} {'type': 'loss', 'content': 0.15543751418590546, 'timestamp': '2025-10-01 04:23:36.244627', 'step': 8824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:36.286865', 'step': 8824, 'epoch': 2} {'type': 'loss', 'content': 0.06490888446569443, 'timestamp': '2025-10-01 04:23:36.288969', 'step': 8825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:36.323061', 'step': 8825, 'epoch': 2} {'type': 'loss', 'content': 0.09710927307605743, 'timestamp': '2025-10-01 04:23:36.326000', 'step': 8826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:36.357512', 'step': 8826, 'epoch': 2} {'type': 'loss', 'content': 0.128012016415596, 'timestamp': '2025-10-01 04:23:36.359780', 'step': 8827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:36.395185', 'step': 8827, 'epoch': 2} {'type': 'loss', 'content': 0.05184914544224739, 'timestamp': '2025-10-01 04:23:36.422834', 'step': 8828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:36.453477', 'step': 8828, 'epoch': 2} {'type': 'loss', 'content': 0.11825517565011978, 'timestamp': '2025-10-01 04:23:36.456095', 'step': 8829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:36.493350', 'step': 8829, 'epoch': 2} {'type': 'loss', 'content': 0.11672000586986542, 'timestamp': '2025-10-01 04:23:36.495519', 'step': 8830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:36.526799', 'step': 8830, 'epoch': 2} {'type': 'loss', 'content': 0.16286656260490417, 'timestamp': '2025-10-01 04:23:36.529070', 'step': 8831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:36.562074', 'step': 8831, 'epoch': 2} {'type': 'loss', 'content': 0.08892830461263657, 'timestamp': '2025-10-01 04:23:36.585651', 'step': 8832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:36.619681', 'step': 8832, 'epoch': 2} {'type': 'loss', 'content': 0.024706976488232613, 'timestamp': '2025-10-01 04:23:36.621525', 'step': 8833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:36.653365', 'step': 8833, 'epoch': 2} {'type': 'loss', 'content': 0.1608525514602661, 'timestamp': '2025-10-01 04:23:36.655467', 'step': 8834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:36.688126', 'step': 8834, 'epoch': 2} {'type': 'loss', 'content': 0.09079266339540482, 'timestamp': '2025-10-01 04:23:36.692564', 'step': 8835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:36.725163', 'step': 8835, 'epoch': 2} {'type': 'loss', 'content': 0.18521004915237427, 'timestamp': '2025-10-01 04:23:36.750444', 'step': 8836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:36.784506', 'step': 8836, 'epoch': 2} {'type': 'loss', 'content': 0.09737731516361237, 'timestamp': '2025-10-01 04:23:36.786660', 'step': 8837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:36.826081', 'step': 8837, 'epoch': 2} {'type': 'loss', 'content': 0.24869324266910553, 'timestamp': '2025-10-01 04:23:36.828090', 'step': 8838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:36.860984', 'step': 8838, 'epoch': 2} {'type': 'loss', 'content': 0.11597917973995209, 'timestamp': '2025-10-01 04:23:36.863068', 'step': 8839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:36.894919', 'step': 8839, 'epoch': 2} {'type': 'loss', 'content': 0.09322470426559448, 'timestamp': '2025-10-01 04:23:36.919310', 'step': 8840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:36.954910', 'step': 8840, 'epoch': 2} {'type': 'loss', 'content': 0.10083255171775818, 'timestamp': '2025-10-01 04:23:36.956879', 'step': 8841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:36.988620', 'step': 8841, 'epoch': 2} {'type': 'loss', 'content': 0.08569949120283127, 'timestamp': '2025-10-01 04:23:36.990872', 'step': 8842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:37.023618', 'step': 8842, 'epoch': 2} {'type': 'loss', 'content': 0.0966954380273819, 'timestamp': '2025-10-01 04:23:37.025977', 'step': 8843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:37.057611', 'step': 8843, 'epoch': 2} {'type': 'loss', 'content': 0.06485885381698608, 'timestamp': '2025-10-01 04:23:37.081852', 'step': 8844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:37.116625', 'step': 8844, 'epoch': 2} {'type': 'loss', 'content': 0.13440601527690887, 'timestamp': '2025-10-01 04:23:37.119251', 'step': 8845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:37.152105', 'step': 8845, 'epoch': 2} {'type': 'loss', 'content': 0.13989397883415222, 'timestamp': '2025-10-01 04:23:37.154377', 'step': 8846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:37.186247', 'step': 8846, 'epoch': 2} {'type': 'loss', 'content': 0.0838952511548996, 'timestamp': '2025-10-01 04:23:37.189367', 'step': 8847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:37.221332', 'step': 8847, 'epoch': 2} {'type': 'loss', 'content': 0.09569957852363586, 'timestamp': '2025-10-01 04:23:37.245035', 'step': 8848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:37.280855', 'step': 8848, 'epoch': 2} {'type': 'loss', 'content': 0.15323522686958313, 'timestamp': '2025-10-01 04:23:37.283170', 'step': 8849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:37.316855', 'step': 8849, 'epoch': 2} {'type': 'loss', 'content': 0.12408697605133057, 'timestamp': '2025-10-01 04:23:37.319148', 'step': 8850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:37.353426', 'step': 8850, 'epoch': 2} {'type': 'loss', 'content': 0.1326947659254074, 'timestamp': '2025-10-01 04:23:37.355877', 'step': 8851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:37.385865', 'step': 8851, 'epoch': 2} {'type': 'loss', 'content': 0.04109076410531998, 'timestamp': '2025-10-01 04:23:37.410850', 'step': 8852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:37.441082', 'step': 8852, 'epoch': 2} {'type': 'loss', 'content': 0.12319692224264145, 'timestamp': '2025-10-01 04:23:37.443085', 'step': 8853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:37.474281', 'step': 8853, 'epoch': 2} {'type': 'loss', 'content': 0.12130335718393326, 'timestamp': '2025-10-01 04:23:37.476406', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:23:45.957589', 'step': 8854, 'epoch': 2} {'type': 'pplx', 'content': 12459.14551356396, 'timestamp': '2025-10-01 04:23:45.960151', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:45.990201', 'step': 8854, 'epoch': 2} {'type': 'loss', 'content': 0.10345783829689026, 'timestamp': '2025-10-01 04:23:45.992154', 'step': 8855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:46.044402', 'step': 8855, 'epoch': 2} {'type': 'loss', 'content': 0.22598494589328766, 'timestamp': '2025-10-01 04:23:46.067945', 'step': 8856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:46.098116', 'step': 8856, 'epoch': 2} {'type': 'loss', 'content': 0.12523983418941498, 'timestamp': '2025-10-01 04:23:46.100273', 'step': 8857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:46.130631', 'step': 8857, 'epoch': 2} {'type': 'loss', 'content': 0.06363078951835632, 'timestamp': '2025-10-01 04:23:46.134027', 'step': 8858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:46.166360', 'step': 8858, 'epoch': 2} {'type': 'loss', 'content': 0.14085297286510468, 'timestamp': '2025-10-01 04:23:46.174265', 'step': 8859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:46.204623', 'step': 8859, 'epoch': 2} {'type': 'loss', 'content': 0.09755382686853409, 'timestamp': '2025-10-01 04:23:46.228164', 'step': 8860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:46.260062', 'step': 8860, 'epoch': 2} {'type': 'loss', 'content': 0.13206395506858826, 'timestamp': '2025-10-01 04:23:46.261970', 'step': 8861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:46.296186', 'step': 8861, 'epoch': 2} {'type': 'loss', 'content': 0.10065058618783951, 'timestamp': '2025-10-01 04:23:46.304129', 'step': 8862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:46.336357', 'step': 8862, 'epoch': 2} {'type': 'loss', 'content': 0.04855739325284958, 'timestamp': '2025-10-01 04:23:46.339214', 'step': 8863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:46.370528', 'step': 8863, 'epoch': 2} {'type': 'loss', 'content': 0.10987304896116257, 'timestamp': '2025-10-01 04:23:46.394772', 'step': 8864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:46.425937', 'step': 8864, 'epoch': 2} {'type': 'loss', 'content': 0.12649698555469513, 'timestamp': '2025-10-01 04:23:46.428324', 'step': 8865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:46.467593', 'step': 8865, 'epoch': 2} {'type': 'loss', 'content': 0.08426196128129959, 'timestamp': '2025-10-01 04:23:46.470351', 'step': 8866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:46.501190', 'step': 8866, 'epoch': 2} {'type': 'loss', 'content': 0.13033153116703033, 'timestamp': '2025-10-01 04:23:46.503414', 'step': 8867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:46.542995', 'step': 8867, 'epoch': 2} {'type': 'loss', 'content': 0.19185568392276764, 'timestamp': '2025-10-01 04:23:46.567028', 'step': 8868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:46.599843', 'step': 8868, 'epoch': 2} {'type': 'loss', 'content': 0.08638229966163635, 'timestamp': '2025-10-01 04:23:46.602273', 'step': 8869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:46.633627', 'step': 8869, 'epoch': 2} {'type': 'loss', 'content': 0.11531879007816315, 'timestamp': '2025-10-01 04:23:46.635622', 'step': 8870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:46.666351', 'step': 8870, 'epoch': 2} {'type': 'loss', 'content': 0.11409790813922882, 'timestamp': '2025-10-01 04:23:46.668619', 'step': 8871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:46.699501', 'step': 8871, 'epoch': 2} {'type': 'loss', 'content': 0.06324128806591034, 'timestamp': '2025-10-01 04:23:46.723057', 'step': 8872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:46.753282', 'step': 8872, 'epoch': 2} {'type': 'loss', 'content': 0.16566631197929382, 'timestamp': '2025-10-01 04:23:46.755745', 'step': 8873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:46.786272', 'step': 8873, 'epoch': 2} {'type': 'loss', 'content': 0.11555444449186325, 'timestamp': '2025-10-01 04:23:46.788629', 'step': 8874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:46.819436', 'step': 8874, 'epoch': 2} {'type': 'loss', 'content': 0.14087916910648346, 'timestamp': '2025-10-01 04:23:46.821442', 'step': 8875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:46.853861', 'step': 8875, 'epoch': 2} {'type': 'loss', 'content': 0.1026686280965805, 'timestamp': '2025-10-01 04:23:46.877638', 'step': 8876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:46.909583', 'step': 8876, 'epoch': 2} {'type': 'loss', 'content': 0.23221765458583832, 'timestamp': '2025-10-01 04:23:46.911411', 'step': 8877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:46.943691', 'step': 8877, 'epoch': 2} {'type': 'loss', 'content': 0.13898225128650665, 'timestamp': '2025-10-01 04:23:46.954509', 'step': 8878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:46.984856', 'step': 8878, 'epoch': 2} {'type': 'loss', 'content': 0.1265588402748108, 'timestamp': '2025-10-01 04:23:46.986649', 'step': 8879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:47.016626', 'step': 8879, 'epoch': 2} {'type': 'loss', 'content': 0.19965852797031403, 'timestamp': '2025-10-01 04:23:47.040447', 'step': 8880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:47.070267', 'step': 8880, 'epoch': 2} {'type': 'loss', 'content': 0.06535404175519943, 'timestamp': '2025-10-01 04:23:47.072516', 'step': 8881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:47.103128', 'step': 8881, 'epoch': 2} {'type': 'loss', 'content': 0.16762197017669678, 'timestamp': '2025-10-01 04:23:47.112201', 'step': 8882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:47.144538', 'step': 8882, 'epoch': 2} {'type': 'loss', 'content': 0.09969744831323624, 'timestamp': '2025-10-01 04:23:47.147177', 'step': 8883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:47.178206', 'step': 8883, 'epoch': 2} {'type': 'loss', 'content': 0.14520621299743652, 'timestamp': '2025-10-01 04:23:47.202118', 'step': 8884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:47.236139', 'step': 8884, 'epoch': 2} {'type': 'loss', 'content': 0.1309579759836197, 'timestamp': '2025-10-01 04:23:47.238507', 'step': 8885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:47.271065', 'step': 8885, 'epoch': 2} {'type': 'loss', 'content': 0.21275368332862854, 'timestamp': '2025-10-01 04:23:47.273244', 'step': 8886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:47.305367', 'step': 8886, 'epoch': 2} {'type': 'loss', 'content': 0.10136346518993378, 'timestamp': '2025-10-01 04:23:47.308091', 'step': 8887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:47.340395', 'step': 8887, 'epoch': 2} {'type': 'loss', 'content': 0.09833846986293793, 'timestamp': '2025-10-01 04:23:47.363954', 'step': 8888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:47.405323', 'step': 8888, 'epoch': 2} {'type': 'loss', 'content': 0.057349566370248795, 'timestamp': '2025-10-01 04:23:47.407538', 'step': 8889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:47.447194', 'step': 8889, 'epoch': 2} {'type': 'loss', 'content': 0.14526429772377014, 'timestamp': '2025-10-01 04:23:47.449437', 'step': 8890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:47.479099', 'step': 8890, 'epoch': 2} {'type': 'loss', 'content': 0.12184885889291763, 'timestamp': '2025-10-01 04:23:47.481024', 'step': 8891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:47.510284', 'step': 8891, 'epoch': 2} {'type': 'loss', 'content': 0.15115903317928314, 'timestamp': '2025-10-01 04:23:47.534399', 'step': 8892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:47.578670', 'step': 8892, 'epoch': 2} {'type': 'loss', 'content': 0.08273782581090927, 'timestamp': '2025-10-01 04:23:47.580385', 'step': 8893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:47.611074', 'step': 8893, 'epoch': 2} {'type': 'loss', 'content': 0.1559804379940033, 'timestamp': '2025-10-01 04:23:47.613415', 'step': 8894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:47.650126', 'step': 8894, 'epoch': 2} {'type': 'loss', 'content': 0.0624290332198143, 'timestamp': '2025-10-01 04:23:47.652775', 'step': 8895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:47.684244', 'step': 8895, 'epoch': 2} {'type': 'loss', 'content': 0.07296434044837952, 'timestamp': '2025-10-01 04:23:47.708260', 'step': 8896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:47.738323', 'step': 8896, 'epoch': 2} {'type': 'loss', 'content': 0.1315327137708664, 'timestamp': '2025-10-01 04:23:47.740579', 'step': 8897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:47.779406', 'step': 8897, 'epoch': 2} {'type': 'loss', 'content': 0.10913815349340439, 'timestamp': '2025-10-01 04:23:47.781760', 'step': 8898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:47.812124', 'step': 8898, 'epoch': 2} {'type': 'loss', 'content': 0.09237957745790482, 'timestamp': '2025-10-01 04:23:47.814978', 'step': 8899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:47.845831', 'step': 8899, 'epoch': 2} {'type': 'loss', 'content': 0.11789941787719727, 'timestamp': '2025-10-01 04:23:47.868965', 'step': 8900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:47.899415', 'step': 8900, 'epoch': 2} {'type': 'loss', 'content': 0.08475127816200256, 'timestamp': '2025-10-01 04:23:47.901521', 'step': 8901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:47.931527', 'step': 8901, 'epoch': 2} {'type': 'loss', 'content': 0.16509674489498138, 'timestamp': '2025-10-01 04:23:47.933577', 'step': 8902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:47.962936', 'step': 8902, 'epoch': 2} {'type': 'loss', 'content': 0.1538289338350296, 'timestamp': '2025-10-01 04:23:47.964946', 'step': 8903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:47.996622', 'step': 8903, 'epoch': 2} {'type': 'loss', 'content': 0.18240034580230713, 'timestamp': '2025-10-01 04:23:48.019944', 'step': 8904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:48.051509', 'step': 8904, 'epoch': 2} {'type': 'loss', 'content': 0.2660641074180603, 'timestamp': '2025-10-01 04:23:48.053645', 'step': 8905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:48.085194', 'step': 8905, 'epoch': 2} {'type': 'loss', 'content': 0.08314402401447296, 'timestamp': '2025-10-01 04:23:48.087293', 'step': 8906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:48.117550', 'step': 8906, 'epoch': 2} {'type': 'loss', 'content': 0.12184230238199234, 'timestamp': '2025-10-01 04:23:48.120442', 'step': 8907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.151144', 'step': 8907, 'epoch': 2} {'type': 'loss', 'content': 0.1857985407114029, 'timestamp': '2025-10-01 04:23:48.174394', 'step': 8908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.206419', 'step': 8908, 'epoch': 2} {'type': 'loss', 'content': 0.0899762213230133, 'timestamp': '2025-10-01 04:23:48.208628', 'step': 8909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:48.241317', 'step': 8909, 'epoch': 2} {'type': 'loss', 'content': 0.13458152115345, 'timestamp': '2025-10-01 04:23:48.245304', 'step': 8910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.277923', 'step': 8910, 'epoch': 2} {'type': 'loss', 'content': 0.1795790046453476, 'timestamp': '2025-10-01 04:23:48.280190', 'step': 8911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.314080', 'step': 8911, 'epoch': 2} {'type': 'loss', 'content': 0.09582635015249252, 'timestamp': '2025-10-01 04:23:48.337631', 'step': 8912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.368591', 'step': 8912, 'epoch': 2} {'type': 'loss', 'content': 0.08632412552833557, 'timestamp': '2025-10-01 04:23:48.373822', 'step': 8913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:48.403780', 'step': 8913, 'epoch': 2} {'type': 'loss', 'content': 0.05097918212413788, 'timestamp': '2025-10-01 04:23:48.414999', 'step': 8914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.445338', 'step': 8914, 'epoch': 2} {'type': 'loss', 'content': 0.18710774183273315, 'timestamp': '2025-10-01 04:23:48.447458', 'step': 8915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:48.477625', 'step': 8915, 'epoch': 2} {'type': 'loss', 'content': 0.040194615721702576, 'timestamp': '2025-10-01 04:23:48.501094', 'step': 8916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:48.531037', 'step': 8916, 'epoch': 2} {'type': 'loss', 'content': 0.04824256896972656, 'timestamp': '2025-10-01 04:23:48.533166', 'step': 8917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:48.565825', 'step': 8917, 'epoch': 2} {'type': 'loss', 'content': 0.07129910588264465, 'timestamp': '2025-10-01 04:23:48.567881', 'step': 8918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:48.599480', 'step': 8918, 'epoch': 2} {'type': 'loss', 'content': 0.1034633070230484, 'timestamp': '2025-10-01 04:23:48.601499', 'step': 8919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.634293', 'step': 8919, 'epoch': 2} {'type': 'loss', 'content': 0.11804089695215225, 'timestamp': '2025-10-01 04:23:48.657860', 'step': 8920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:48.688930', 'step': 8920, 'epoch': 2} {'type': 'loss', 'content': 0.09325628727674484, 'timestamp': '2025-10-01 04:23:48.690992', 'step': 8921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.721131', 'step': 8921, 'epoch': 2} {'type': 'loss', 'content': 0.13270416855812073, 'timestamp': '2025-10-01 04:23:48.723168', 'step': 8922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:48.754560', 'step': 8922, 'epoch': 2} {'type': 'loss', 'content': 0.20302660763263702, 'timestamp': '2025-10-01 04:23:48.756646', 'step': 8923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.787395', 'step': 8923, 'epoch': 2} {'type': 'loss', 'content': 0.14499253034591675, 'timestamp': '2025-10-01 04:23:48.811085', 'step': 8924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:48.841554', 'step': 8924, 'epoch': 2} {'type': 'loss', 'content': 0.10422112047672272, 'timestamp': '2025-10-01 04:23:48.843811', 'step': 8925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.875982', 'step': 8925, 'epoch': 2} {'type': 'loss', 'content': 0.07201245427131653, 'timestamp': '2025-10-01 04:23:48.885518', 'step': 8926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:48.915876', 'step': 8926, 'epoch': 2} {'type': 'loss', 'content': 0.07392963021993637, 'timestamp': '2025-10-01 04:23:48.917954', 'step': 8927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:48.955690', 'step': 8927, 'epoch': 2} {'type': 'loss', 'content': 0.22787272930145264, 'timestamp': '2025-10-01 04:23:48.979655', 'step': 8928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:49.011083', 'step': 8928, 'epoch': 2} {'type': 'loss', 'content': 0.21656249463558197, 'timestamp': '2025-10-01 04:23:49.013180', 'step': 8929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:49.043560', 'step': 8929, 'epoch': 2} {'type': 'loss', 'content': 0.17016568779945374, 'timestamp': '2025-10-01 04:23:49.045710', 'step': 8930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:49.076494', 'step': 8930, 'epoch': 2} {'type': 'loss', 'content': 0.08799602091312408, 'timestamp': '2025-10-01 04:23:49.078547', 'step': 8931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:49.108734', 'step': 8931, 'epoch': 2} {'type': 'loss', 'content': 0.07445869594812393, 'timestamp': '2025-10-01 04:23:49.132413', 'step': 8932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:49.163027', 'step': 8932, 'epoch': 2} {'type': 'loss', 'content': 0.04753737524151802, 'timestamp': '2025-10-01 04:23:49.165186', 'step': 8933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:49.195212', 'step': 8933, 'epoch': 2} {'type': 'loss', 'content': 0.12697899341583252, 'timestamp': '2025-10-01 04:23:49.197351', 'step': 8934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:49.230257', 'step': 8934, 'epoch': 2} {'type': 'loss', 'content': 0.08470513671636581, 'timestamp': '2025-10-01 04:23:49.232395', 'step': 8935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:49.273781', 'step': 8935, 'epoch': 2} {'type': 'loss', 'content': 0.05644720792770386, 'timestamp': '2025-10-01 04:23:49.296997', 'step': 8936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:49.326956', 'step': 8936, 'epoch': 2} {'type': 'loss', 'content': 0.17439092695713043, 'timestamp': '2025-10-01 04:23:49.329082', 'step': 8937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:49.360146', 'step': 8937, 'epoch': 2} {'type': 'loss', 'content': 0.03794971480965614, 'timestamp': '2025-10-01 04:23:49.362310', 'step': 8938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:49.393063', 'step': 8938, 'epoch': 2} {'type': 'loss', 'content': 0.14047639071941376, 'timestamp': '2025-10-01 04:23:49.395413', 'step': 8939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:49.425339', 'step': 8939, 'epoch': 2} {'type': 'loss', 'content': 0.12894457578659058, 'timestamp': '2025-10-01 04:23:49.449832', 'step': 8940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:49.487629', 'step': 8940, 'epoch': 2} {'type': 'loss', 'content': 0.08747892826795578, 'timestamp': '2025-10-01 04:23:49.490062', 'step': 8941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:49.521269', 'step': 8941, 'epoch': 2} {'type': 'loss', 'content': 0.10789047926664352, 'timestamp': '2025-10-01 04:23:49.523356', 'step': 8942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:49.554900', 'step': 8942, 'epoch': 2} {'type': 'loss', 'content': 0.10498371720314026, 'timestamp': '2025-10-01 04:23:49.557060', 'step': 8943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:49.596515', 'step': 8943, 'epoch': 2} {'type': 'loss', 'content': 0.08278381079435349, 'timestamp': '2025-10-01 04:23:49.620135', 'step': 8944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:49.651039', 'step': 8944, 'epoch': 2} {'type': 'loss', 'content': 0.1492161899805069, 'timestamp': '2025-10-01 04:23:49.653124', 'step': 8945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:49.683324', 'step': 8945, 'epoch': 2} {'type': 'loss', 'content': 0.09496961534023285, 'timestamp': '2025-10-01 04:23:49.685636', 'step': 8946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:49.716827', 'step': 8946, 'epoch': 2} {'type': 'loss', 'content': 0.08173523843288422, 'timestamp': '2025-10-01 04:23:49.718909', 'step': 8947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:49.749428', 'step': 8947, 'epoch': 2} {'type': 'loss', 'content': 0.13299192488193512, 'timestamp': '2025-10-01 04:23:49.773175', 'step': 8948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:49.804814', 'step': 8948, 'epoch': 2} {'type': 'loss', 'content': 0.12270796298980713, 'timestamp': '2025-10-01 04:23:49.806893', 'step': 8949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:49.837812', 'step': 8949, 'epoch': 2} {'type': 'loss', 'content': 0.16310752928256989, 'timestamp': '2025-10-01 04:23:49.839893', 'step': 8950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:49.870607', 'step': 8950, 'epoch': 2} {'type': 'loss', 'content': 0.0915084183216095, 'timestamp': '2025-10-01 04:23:49.872921', 'step': 8951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:49.903680', 'step': 8951, 'epoch': 2} {'type': 'loss', 'content': 0.06952092796564102, 'timestamp': '2025-10-01 04:23:49.927706', 'step': 8952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:49.958008', 'step': 8952, 'epoch': 2} {'type': 'loss', 'content': 0.10794419795274734, 'timestamp': '2025-10-01 04:23:49.960187', 'step': 8953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:49.993978', 'step': 8953, 'epoch': 2} {'type': 'loss', 'content': 0.0781678780913353, 'timestamp': '2025-10-01 04:23:49.996133', 'step': 8954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:50.027111', 'step': 8954, 'epoch': 2} {'type': 'loss', 'content': 0.11047589033842087, 'timestamp': '2025-10-01 04:23:50.029395', 'step': 8955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:50.059917', 'step': 8955, 'epoch': 2} {'type': 'loss', 'content': 0.24915051460266113, 'timestamp': '2025-10-01 04:23:50.083746', 'step': 8956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:50.113558', 'step': 8956, 'epoch': 2} {'type': 'loss', 'content': 0.12894070148468018, 'timestamp': '2025-10-01 04:23:50.116525', 'step': 8957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:50.149859', 'step': 8957, 'epoch': 2} {'type': 'loss', 'content': 0.09907737374305725, 'timestamp': '2025-10-01 04:23:50.153663', 'step': 8958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:50.184724', 'step': 8958, 'epoch': 2} {'type': 'loss', 'content': 0.07357003539800644, 'timestamp': '2025-10-01 04:23:50.186979', 'step': 8959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:50.218267', 'step': 8959, 'epoch': 2} {'type': 'loss', 'content': 0.10470066219568253, 'timestamp': '2025-10-01 04:23:50.241889', 'step': 8960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:50.282448', 'step': 8960, 'epoch': 2} {'type': 'loss', 'content': 0.15477342903614044, 'timestamp': '2025-10-01 04:23:50.284635', 'step': 8961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:50.320895', 'step': 8961, 'epoch': 2} {'type': 'loss', 'content': 0.14876198768615723, 'timestamp': '2025-10-01 04:23:50.323211', 'step': 8962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:50.354417', 'step': 8962, 'epoch': 2} {'type': 'loss', 'content': 0.11081531643867493, 'timestamp': '2025-10-01 04:23:50.356789', 'step': 8963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:50.387536', 'step': 8963, 'epoch': 2} {'type': 'loss', 'content': 0.08339638262987137, 'timestamp': '2025-10-01 04:23:50.418284', 'step': 8964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:50.460996', 'step': 8964, 'epoch': 2} {'type': 'loss', 'content': 0.13243263959884644, 'timestamp': '2025-10-01 04:23:50.463212', 'step': 8965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:50.494346', 'step': 8965, 'epoch': 2} {'type': 'loss', 'content': 0.11337575316429138, 'timestamp': '2025-10-01 04:23:50.500448', 'step': 8966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:50.531341', 'step': 8966, 'epoch': 2} {'type': 'loss', 'content': 0.04789215698838234, 'timestamp': '2025-10-01 04:23:50.535186', 'step': 8967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:50.566617', 'step': 8967, 'epoch': 2} {'type': 'loss', 'content': 0.11039604991674423, 'timestamp': '2025-10-01 04:23:50.591539', 'step': 8968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:50.625068', 'step': 8968, 'epoch': 2} {'type': 'loss', 'content': 0.11974212527275085, 'timestamp': '2025-10-01 04:23:50.627881', 'step': 8969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:50.659245', 'step': 8969, 'epoch': 2} {'type': 'loss', 'content': 0.09501833468675613, 'timestamp': '2025-10-01 04:23:50.661533', 'step': 8970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:23:50.692451', 'step': 8970, 'epoch': 2} {'type': 'loss', 'content': 0.07714784890413284, 'timestamp': '2025-10-01 04:23:50.696654', 'step': 8971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:50.730600', 'step': 8971, 'epoch': 2} {'type': 'loss', 'content': 0.11058115214109421, 'timestamp': '2025-10-01 04:23:50.754162', 'step': 8972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:50.784809', 'step': 8972, 'epoch': 2} {'type': 'loss', 'content': 0.10411589592695236, 'timestamp': '2025-10-01 04:23:50.786874', 'step': 8973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:50.817296', 'step': 8973, 'epoch': 2} {'type': 'loss', 'content': 0.21059738099575043, 'timestamp': '2025-10-01 04:23:50.819382', 'step': 8974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:50.850002', 'step': 8974, 'epoch': 2} {'type': 'loss', 'content': 0.0714714527130127, 'timestamp': '2025-10-01 04:23:50.852176', 'step': 8975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:50.883042', 'step': 8975, 'epoch': 2} {'type': 'loss', 'content': 0.15826007723808289, 'timestamp': '2025-10-01 04:23:50.906443', 'step': 8976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:50.937628', 'step': 8976, 'epoch': 2} {'type': 'loss', 'content': 0.06564643234014511, 'timestamp': '2025-10-01 04:23:50.939836', 'step': 8977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:50.970659', 'step': 8977, 'epoch': 2} {'type': 'loss', 'content': 0.1271742582321167, 'timestamp': '2025-10-01 04:23:50.972788', 'step': 8978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:51.004195', 'step': 8978, 'epoch': 2} {'type': 'loss', 'content': 0.2381831556558609, 'timestamp': '2025-10-01 04:23:51.006627', 'step': 8979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:51.037255', 'step': 8979, 'epoch': 2} {'type': 'loss', 'content': 0.0654611811041832, 'timestamp': '2025-10-01 04:23:51.060769', 'step': 8980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:51.094371', 'step': 8980, 'epoch': 2} {'type': 'loss', 'content': 0.027938874438405037, 'timestamp': '2025-10-01 04:23:51.096444', 'step': 8981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:51.126837', 'step': 8981, 'epoch': 2} {'type': 'loss', 'content': 0.09996950626373291, 'timestamp': '2025-10-01 04:23:51.129124', 'step': 8982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:51.166954', 'step': 8982, 'epoch': 2} {'type': 'loss', 'content': 0.09702509641647339, 'timestamp': '2025-10-01 04:23:51.169693', 'step': 8983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:51.201808', 'step': 8983, 'epoch': 2} {'type': 'loss', 'content': 0.11759143322706223, 'timestamp': '2025-10-01 04:23:51.226103', 'step': 8984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:51.256753', 'step': 8984, 'epoch': 2} {'type': 'loss', 'content': 0.1555892527103424, 'timestamp': '2025-10-01 04:23:51.259151', 'step': 8985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:51.293182', 'step': 8985, 'epoch': 2} {'type': 'loss', 'content': 0.14593444764614105, 'timestamp': '2025-10-01 04:23:51.295338', 'step': 8986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:51.332520', 'step': 8986, 'epoch': 2} {'type': 'loss', 'content': 0.11862227320671082, 'timestamp': '2025-10-01 04:23:51.336505', 'step': 8987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:51.371258', 'step': 8987, 'epoch': 2} {'type': 'loss', 'content': 0.09263288229703903, 'timestamp': '2025-10-01 04:23:51.403602', 'step': 8988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:51.434185', 'step': 8988, 'epoch': 2} {'type': 'loss', 'content': 0.08185911923646927, 'timestamp': '2025-10-01 04:23:51.436846', 'step': 8989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:51.471243', 'step': 8989, 'epoch': 2} {'type': 'loss', 'content': 0.06675451248884201, 'timestamp': '2025-10-01 04:23:51.473428', 'step': 8990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:51.504045', 'step': 8990, 'epoch': 2} {'type': 'loss', 'content': 0.10983333736658096, 'timestamp': '2025-10-01 04:23:51.506481', 'step': 8991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:51.540149', 'step': 8991, 'epoch': 2} {'type': 'loss', 'content': 0.0734836533665657, 'timestamp': '2025-10-01 04:23:51.569544', 'step': 8992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:51.600586', 'step': 8992, 'epoch': 2} {'type': 'loss', 'content': 0.09983839094638824, 'timestamp': '2025-10-01 04:23:51.603525', 'step': 8993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:51.634452', 'step': 8993, 'epoch': 2} {'type': 'loss', 'content': 0.06619620323181152, 'timestamp': '2025-10-01 04:23:51.637020', 'step': 8994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:51.677355', 'step': 8994, 'epoch': 2} {'type': 'loss', 'content': 0.15826359391212463, 'timestamp': '2025-10-01 04:23:51.679422', 'step': 8995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:51.710314', 'step': 8995, 'epoch': 2} {'type': 'loss', 'content': 0.05177300423383713, 'timestamp': '2025-10-01 04:23:51.733971', 'step': 8996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:51.768205', 'step': 8996, 'epoch': 2} {'type': 'loss', 'content': 0.06346496939659119, 'timestamp': '2025-10-01 04:23:51.770098', 'step': 8997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:51.801281', 'step': 8997, 'epoch': 2} {'type': 'loss', 'content': 0.21264414489269257, 'timestamp': '2025-10-01 04:23:51.805083', 'step': 8998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:51.836061', 'step': 8998, 'epoch': 2} {'type': 'loss', 'content': 0.09704133868217468, 'timestamp': '2025-10-01 04:23:51.838762', 'step': 8999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:51.869054', 'step': 8999, 'epoch': 2} {'type': 'loss', 'content': 0.1038002148270607, 'timestamp': '2025-10-01 04:23:51.893107', 'step': 9000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9000', 'timestamp': '2025-10-01 04:23:57.378297', 'step': 9000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:57.412378', 'step': 9000, 'epoch': 2} {'type': 'loss', 'content': 0.17565670609474182, 'timestamp': '2025-10-01 04:23:57.414474', 'step': 9001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:57.447407', 'step': 9001, 'epoch': 2} {'type': 'loss', 'content': 0.11658243089914322, 'timestamp': '2025-10-01 04:23:57.449644', 'step': 9002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:57.496829', 'step': 9002, 'epoch': 2} {'type': 'loss', 'content': 0.07450448721647263, 'timestamp': '2025-10-01 04:23:57.498953', 'step': 9003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:57.544996', 'step': 9003, 'epoch': 2} {'type': 'loss', 'content': 0.0894489735364914, 'timestamp': '2025-10-01 04:23:57.568711', 'step': 9004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:57.604749', 'step': 9004, 'epoch': 2} {'type': 'loss', 'content': 0.20902878046035767, 'timestamp': '2025-10-01 04:23:57.618815', 'step': 9005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:57.658006', 'step': 9005, 'epoch': 2} {'type': 'loss', 'content': 0.04955531284213066, 'timestamp': '2025-10-01 04:23:57.660337', 'step': 9006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:57.702872', 'step': 9006, 'epoch': 2} {'type': 'loss', 'content': 0.08278980851173401, 'timestamp': '2025-10-01 04:23:57.705054', 'step': 9007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:57.738392', 'step': 9007, 'epoch': 2} {'type': 'loss', 'content': 0.10295188426971436, 'timestamp': '2025-10-01 04:23:57.762424', 'step': 9008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:57.796060', 'step': 9008, 'epoch': 2} {'type': 'loss', 'content': 0.14557236433029175, 'timestamp': '2025-10-01 04:23:57.798175', 'step': 9009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:57.829918', 'step': 9009, 'epoch': 2} {'type': 'loss', 'content': 0.08429577201604843, 'timestamp': '2025-10-01 04:23:57.832091', 'step': 9010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:57.866578', 'step': 9010, 'epoch': 2} {'type': 'loss', 'content': 0.09979969263076782, 'timestamp': '2025-10-01 04:23:57.868816', 'step': 9011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:57.900702', 'step': 9011, 'epoch': 2} {'type': 'loss', 'content': 0.028951318934559822, 'timestamp': '2025-10-01 04:23:57.924476', 'step': 9012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:57.962689', 'step': 9012, 'epoch': 2} {'type': 'loss', 'content': 0.13335569202899933, 'timestamp': '2025-10-01 04:23:57.965152', 'step': 9013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:58.004037', 'step': 9013, 'epoch': 2} {'type': 'loss', 'content': 0.12482884526252747, 'timestamp': '2025-10-01 04:23:58.006754', 'step': 9014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:58.039432', 'step': 9014, 'epoch': 2} {'type': 'loss', 'content': 0.1735832244157791, 'timestamp': '2025-10-01 04:23:58.041559', 'step': 9015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:58.074686', 'step': 9015, 'epoch': 2} {'type': 'loss', 'content': 0.14427964389324188, 'timestamp': '2025-10-01 04:23:58.098626', 'step': 9016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:58.135088', 'step': 9016, 'epoch': 2} {'type': 'loss', 'content': 0.09199538826942444, 'timestamp': '2025-10-01 04:23:58.137235', 'step': 9017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:58.170114', 'step': 9017, 'epoch': 2} {'type': 'loss', 'content': 0.06266956031322479, 'timestamp': '2025-10-01 04:23:58.172240', 'step': 9018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:58.211276', 'step': 9018, 'epoch': 2} {'type': 'loss', 'content': 0.12271769344806671, 'timestamp': '2025-10-01 04:23:58.213598', 'step': 9019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:58.250206', 'step': 9019, 'epoch': 2} {'type': 'loss', 'content': 0.12879875302314758, 'timestamp': '2025-10-01 04:23:58.273932', 'step': 9020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:58.306036', 'step': 9020, 'epoch': 2} {'type': 'loss', 'content': 0.09046468883752823, 'timestamp': '2025-10-01 04:23:58.308119', 'step': 9021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:58.340095', 'step': 9021, 'epoch': 2} {'type': 'loss', 'content': 0.05662700906395912, 'timestamp': '2025-10-01 04:23:58.342370', 'step': 9022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:23:58.377530', 'step': 9022, 'epoch': 2} {'type': 'loss', 'content': 0.0934288427233696, 'timestamp': '2025-10-01 04:23:58.379800', 'step': 9023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:58.416838', 'step': 9023, 'epoch': 2} {'type': 'loss', 'content': 0.07215335965156555, 'timestamp': '2025-10-01 04:23:58.440519', 'step': 9024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:58.483339', 'step': 9024, 'epoch': 2} {'type': 'loss', 'content': 0.0861344188451767, 'timestamp': '2025-10-01 04:23:58.485476', 'step': 9025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:58.517867', 'step': 9025, 'epoch': 2} {'type': 'loss', 'content': 0.1757594645023346, 'timestamp': '2025-10-01 04:23:58.520009', 'step': 9026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:58.554595', 'step': 9026, 'epoch': 2} {'type': 'loss', 'content': 0.05596724897623062, 'timestamp': '2025-10-01 04:23:58.556904', 'step': 9027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:58.594614', 'step': 9027, 'epoch': 2} {'type': 'loss', 'content': 0.10164102911949158, 'timestamp': '2025-10-01 04:23:58.618388', 'step': 9028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:58.652051', 'step': 9028, 'epoch': 2} {'type': 'loss', 'content': 0.12706370651721954, 'timestamp': '2025-10-01 04:23:58.654308', 'step': 9029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:58.702337', 'step': 9029, 'epoch': 2} {'type': 'loss', 'content': 0.15547607839107513, 'timestamp': '2025-10-01 04:23:58.704911', 'step': 9030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:58.743209', 'step': 9030, 'epoch': 2} {'type': 'loss', 'content': 0.10111110657453537, 'timestamp': '2025-10-01 04:23:58.745542', 'step': 9031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:58.782831', 'step': 9031, 'epoch': 2} {'type': 'loss', 'content': 0.22445206344127655, 'timestamp': '2025-10-01 04:23:58.807003', 'step': 9032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:23:58.845173', 'step': 9032, 'epoch': 2} {'type': 'loss', 'content': 0.09090282768011093, 'timestamp': '2025-10-01 04:23:58.847412', 'step': 9033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:23:58.878610', 'step': 9033, 'epoch': 2} {'type': 'loss', 'content': 0.07957765460014343, 'timestamp': '2025-10-01 04:23:58.882882', 'step': 9034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:58.914172', 'step': 9034, 'epoch': 2} {'type': 'loss', 'content': 0.12437514960765839, 'timestamp': '2025-10-01 04:23:58.916531', 'step': 9035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:58.954975', 'step': 9035, 'epoch': 2} {'type': 'loss', 'content': 0.1452094167470932, 'timestamp': '2025-10-01 04:23:58.981316', 'step': 9036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:59.018908', 'step': 9036, 'epoch': 2} {'type': 'loss', 'content': 0.13752660155296326, 'timestamp': '2025-10-01 04:23:59.021111', 'step': 9037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:59.057898', 'step': 9037, 'epoch': 2} {'type': 'loss', 'content': 0.11261703073978424, 'timestamp': '2025-10-01 04:23:59.060000', 'step': 9038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:59.096234', 'step': 9038, 'epoch': 2} {'type': 'loss', 'content': 0.12643523514270782, 'timestamp': '2025-10-01 04:23:59.098431', 'step': 9039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:59.129498', 'step': 9039, 'epoch': 2} {'type': 'loss', 'content': 0.10234076529741287, 'timestamp': '2025-10-01 04:23:59.153061', 'step': 9040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:23:59.185304', 'step': 9040, 'epoch': 2} {'type': 'loss', 'content': 0.08875271677970886, 'timestamp': '2025-10-01 04:23:59.187547', 'step': 9041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:59.222962', 'step': 9041, 'epoch': 2} {'type': 'loss', 'content': 0.17010776698589325, 'timestamp': '2025-10-01 04:23:59.225271', 'step': 9042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:59.257527', 'step': 9042, 'epoch': 2} {'type': 'loss', 'content': 0.14965245127677917, 'timestamp': '2025-10-01 04:23:59.261509', 'step': 9043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:59.293082', 'step': 9043, 'epoch': 2} {'type': 'loss', 'content': 0.07748444378376007, 'timestamp': '2025-10-01 04:23:59.316784', 'step': 9044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:59.354772', 'step': 9044, 'epoch': 2} {'type': 'loss', 'content': 0.133500337600708, 'timestamp': '2025-10-01 04:23:59.356736', 'step': 9045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:59.393928', 'step': 9045, 'epoch': 2} {'type': 'loss', 'content': 0.1228242814540863, 'timestamp': '2025-10-01 04:23:59.395996', 'step': 9046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:59.429496', 'step': 9046, 'epoch': 2} {'type': 'loss', 'content': 0.14081627130508423, 'timestamp': '2025-10-01 04:23:59.431592', 'step': 9047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:59.470139', 'step': 9047, 'epoch': 2} {'type': 'loss', 'content': 0.07954811304807663, 'timestamp': '2025-10-01 04:23:59.493877', 'step': 9048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:59.526831', 'step': 9048, 'epoch': 2} {'type': 'loss', 'content': 0.09367377310991287, 'timestamp': '2025-10-01 04:23:59.529065', 'step': 9049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:59.561458', 'step': 9049, 'epoch': 2} {'type': 'loss', 'content': 0.17442946135997772, 'timestamp': '2025-10-01 04:23:59.563550', 'step': 9050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:59.600452', 'step': 9050, 'epoch': 2} {'type': 'loss', 'content': 0.11379627138376236, 'timestamp': '2025-10-01 04:23:59.602620', 'step': 9051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:59.634585', 'step': 9051, 'epoch': 2} {'type': 'loss', 'content': 0.17617470026016235, 'timestamp': '2025-10-01 04:23:59.659520', 'step': 9052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:59.697704', 'step': 9052, 'epoch': 2} {'type': 'loss', 'content': 0.10895762592554092, 'timestamp': '2025-10-01 04:23:59.699823', 'step': 9053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:59.732127', 'step': 9053, 'epoch': 2} {'type': 'loss', 'content': 0.17807745933532715, 'timestamp': '2025-10-01 04:23:59.734181', 'step': 9054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:23:59.773697', 'step': 9054, 'epoch': 2} {'type': 'loss', 'content': 0.08665461093187332, 'timestamp': '2025-10-01 04:23:59.775916', 'step': 9055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:59.807518', 'step': 9055, 'epoch': 2} {'type': 'loss', 'content': 0.121172696352005, 'timestamp': '2025-10-01 04:23:59.831728', 'step': 9056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:59.874727', 'step': 9056, 'epoch': 2} {'type': 'loss', 'content': 0.16507521271705627, 'timestamp': '2025-10-01 04:23:59.876873', 'step': 9057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:59.908463', 'step': 9057, 'epoch': 2} {'type': 'loss', 'content': 0.0797373428940773, 'timestamp': '2025-10-01 04:23:59.912076', 'step': 9058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:23:59.943296', 'step': 9058, 'epoch': 2} {'type': 'loss', 'content': 0.15952274203300476, 'timestamp': '2025-10-01 04:23:59.945596', 'step': 9059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:23:59.981807', 'step': 9059, 'epoch': 2} {'type': 'loss', 'content': 0.1466698944568634, 'timestamp': '2025-10-01 04:24:00.005701', 'step': 9060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:00.038306', 'step': 9060, 'epoch': 2} {'type': 'loss', 'content': 0.0925530195236206, 'timestamp': '2025-10-01 04:24:00.040370', 'step': 9061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:00.077907', 'step': 9061, 'epoch': 2} {'type': 'loss', 'content': 0.10763443261384964, 'timestamp': '2025-10-01 04:24:00.080207', 'step': 9062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:24:00.112744', 'step': 9062, 'epoch': 2} {'type': 'loss', 'content': 0.09996942430734634, 'timestamp': '2025-10-01 04:24:00.117067', 'step': 9063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:00.150047', 'step': 9063, 'epoch': 2} {'type': 'loss', 'content': 0.11715812981128693, 'timestamp': '2025-10-01 04:24:00.173947', 'step': 9064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:00.225765', 'step': 9064, 'epoch': 2} {'type': 'loss', 'content': 0.07883653044700623, 'timestamp': '2025-10-01 04:24:00.228009', 'step': 9065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:00.271604', 'step': 9065, 'epoch': 2} {'type': 'loss', 'content': 0.10762228071689606, 'timestamp': '2025-10-01 04:24:00.274142', 'step': 9066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:00.314178', 'step': 9066, 'epoch': 2} {'type': 'loss', 'content': 0.05855914205312729, 'timestamp': '2025-10-01 04:24:00.316539', 'step': 9067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:00.366862', 'step': 9067, 'epoch': 2} {'type': 'loss', 'content': 0.15776745975017548, 'timestamp': '2025-10-01 04:24:00.391079', 'step': 9068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:00.423454', 'step': 9068, 'epoch': 2} {'type': 'loss', 'content': 0.11159289628267288, 'timestamp': '2025-10-01 04:24:00.427677', 'step': 9069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:00.474525', 'step': 9069, 'epoch': 2} {'type': 'loss', 'content': 0.09546072781085968, 'timestamp': '2025-10-01 04:24:00.476521', 'step': 9070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:00.540342', 'step': 9070, 'epoch': 2} {'type': 'loss', 'content': 0.14203666150569916, 'timestamp': '2025-10-01 04:24:00.545346', 'step': 9071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:00.590859', 'step': 9071, 'epoch': 2} {'type': 'loss', 'content': 0.08632621169090271, 'timestamp': '2025-10-01 04:24:00.617592', 'step': 9072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:00.679414', 'step': 9072, 'epoch': 2} {'type': 'loss', 'content': 0.09527979791164398, 'timestamp': '2025-10-01 04:24:00.681678', 'step': 9073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:00.732688', 'step': 9073, 'epoch': 2} {'type': 'loss', 'content': 0.07874315977096558, 'timestamp': '2025-10-01 04:24:00.734800', 'step': 9074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:00.778795', 'step': 9074, 'epoch': 2} {'type': 'loss', 'content': 0.2564515173435211, 'timestamp': '2025-10-01 04:24:00.781179', 'step': 9075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:00.829819', 'step': 9075, 'epoch': 2} {'type': 'loss', 'content': 0.09138540923595428, 'timestamp': '2025-10-01 04:24:00.864928', 'step': 9076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:00.909048', 'step': 9076, 'epoch': 2} {'type': 'loss', 'content': 0.13261890411376953, 'timestamp': '2025-10-01 04:24:00.911746', 'step': 9077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:00.968940', 'step': 9077, 'epoch': 2} {'type': 'loss', 'content': 0.11960690468549728, 'timestamp': '2025-10-01 04:24:00.970972', 'step': 9078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:01.015232', 'step': 9078, 'epoch': 2} {'type': 'loss', 'content': 0.08667067438364029, 'timestamp': '2025-10-01 04:24:01.017444', 'step': 9079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:01.055737', 'step': 9079, 'epoch': 2} {'type': 'loss', 'content': 0.1010289192199707, 'timestamp': '2025-10-01 04:24:01.079482', 'step': 9080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:01.114814', 'step': 9080, 'epoch': 2} {'type': 'loss', 'content': 0.17082688212394714, 'timestamp': '2025-10-01 04:24:01.116862', 'step': 9081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:01.174724', 'step': 9081, 'epoch': 2} {'type': 'loss', 'content': 0.11423228681087494, 'timestamp': '2025-10-01 04:24:01.176952', 'step': 9082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:01.227711', 'step': 9082, 'epoch': 2} {'type': 'loss', 'content': 0.1464153528213501, 'timestamp': '2025-10-01 04:24:01.230491', 'step': 9083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:01.267687', 'step': 9083, 'epoch': 2} {'type': 'loss', 'content': 0.1722312569618225, 'timestamp': '2025-10-01 04:24:01.291397', 'step': 9084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:01.346219', 'step': 9084, 'epoch': 2} {'type': 'loss', 'content': 0.07396863400936127, 'timestamp': '2025-10-01 04:24:01.348882', 'step': 9085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:01.395694', 'step': 9085, 'epoch': 2} {'type': 'loss', 'content': 0.09324140101671219, 'timestamp': '2025-10-01 04:24:01.399197', 'step': 9086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:01.443721', 'step': 9086, 'epoch': 2} {'type': 'loss', 'content': 0.113084577023983, 'timestamp': '2025-10-01 04:24:01.446216', 'step': 9087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:01.477802', 'step': 9087, 'epoch': 2} {'type': 'loss', 'content': 0.08576688170433044, 'timestamp': '2025-10-01 04:24:01.501702', 'step': 9088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:01.553497', 'step': 9088, 'epoch': 2} {'type': 'loss', 'content': 0.08882357180118561, 'timestamp': '2025-10-01 04:24:01.556051', 'step': 9089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:01.590045', 'step': 9089, 'epoch': 2} {'type': 'loss', 'content': 0.15211908519268036, 'timestamp': '2025-10-01 04:24:01.592308', 'step': 9090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:01.626986', 'step': 9090, 'epoch': 2} {'type': 'loss', 'content': 0.057494767010211945, 'timestamp': '2025-10-01 04:24:01.629124', 'step': 9091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:01.663238', 'step': 9091, 'epoch': 2} {'type': 'loss', 'content': 0.1060541570186615, 'timestamp': '2025-10-01 04:24:01.686947', 'step': 9092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:01.718451', 'step': 9092, 'epoch': 2} {'type': 'loss', 'content': 0.11374165862798691, 'timestamp': '2025-10-01 04:24:01.720442', 'step': 9093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:01.754923', 'step': 9093, 'epoch': 2} {'type': 'loss', 'content': 0.16326314210891724, 'timestamp': '2025-10-01 04:24:01.757163', 'step': 9094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:01.790354', 'step': 9094, 'epoch': 2} {'type': 'loss', 'content': 0.10328130424022675, 'timestamp': '2025-10-01 04:24:01.804267', 'step': 9095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:01.837801', 'step': 9095, 'epoch': 2} {'type': 'loss', 'content': 0.09729810804128647, 'timestamp': '2025-10-01 04:24:01.861560', 'step': 9096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:01.895267', 'step': 9096, 'epoch': 2} {'type': 'loss', 'content': 0.16666726768016815, 'timestamp': '2025-10-01 04:24:01.897824', 'step': 9097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:01.930736', 'step': 9097, 'epoch': 2} {'type': 'loss', 'content': 0.22868508100509644, 'timestamp': '2025-10-01 04:24:01.933123', 'step': 9098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:01.969715', 'step': 9098, 'epoch': 2} {'type': 'loss', 'content': 0.1982744336128235, 'timestamp': '2025-10-01 04:24:01.972911', 'step': 9099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:02.010426', 'step': 9099, 'epoch': 2} {'type': 'loss', 'content': 0.136659637093544, 'timestamp': '2025-10-01 04:24:02.034261', 'step': 9100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:02.069363', 'step': 9100, 'epoch': 2} {'type': 'loss', 'content': 0.17124813795089722, 'timestamp': '2025-10-01 04:24:02.071723', 'step': 9101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:02.105615', 'step': 9101, 'epoch': 2} {'type': 'loss', 'content': 0.08652752637863159, 'timestamp': '2025-10-01 04:24:02.120618', 'step': 9102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.158605', 'step': 9102, 'epoch': 2} {'type': 'loss', 'content': 0.11661908030509949, 'timestamp': '2025-10-01 04:24:02.160892', 'step': 9103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:02.194700', 'step': 9103, 'epoch': 2} {'type': 'loss', 'content': 0.18139347434043884, 'timestamp': '2025-10-01 04:24:02.219033', 'step': 9104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:02.252212', 'step': 9104, 'epoch': 2} {'type': 'loss', 'content': 0.09669254720211029, 'timestamp': '2025-10-01 04:24:02.255006', 'step': 9105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.288387', 'step': 9105, 'epoch': 2} {'type': 'loss', 'content': 0.0573311410844326, 'timestamp': '2025-10-01 04:24:02.290628', 'step': 9106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.326302', 'step': 9106, 'epoch': 2} {'type': 'loss', 'content': 0.06933499872684479, 'timestamp': '2025-10-01 04:24:02.328593', 'step': 9107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:02.361533', 'step': 9107, 'epoch': 2} {'type': 'loss', 'content': 0.09468597173690796, 'timestamp': '2025-10-01 04:24:02.386695', 'step': 9108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.419398', 'step': 9108, 'epoch': 2} {'type': 'loss', 'content': 0.11966461688280106, 'timestamp': '2025-10-01 04:24:02.421439', 'step': 9109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.453594', 'step': 9109, 'epoch': 2} {'type': 'loss', 'content': 0.146241694688797, 'timestamp': '2025-10-01 04:24:02.455815', 'step': 9110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.487466', 'step': 9110, 'epoch': 2} {'type': 'loss', 'content': 0.12525542080402374, 'timestamp': '2025-10-01 04:24:02.489614', 'step': 9111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.523109', 'step': 9111, 'epoch': 2} {'type': 'loss', 'content': 0.0589694008231163, 'timestamp': '2025-10-01 04:24:02.546823', 'step': 9112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:02.603701', 'step': 9112, 'epoch': 2} {'type': 'loss', 'content': 0.14598368108272552, 'timestamp': '2025-10-01 04:24:02.605913', 'step': 9113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.636886', 'step': 9113, 'epoch': 2} {'type': 'loss', 'content': 0.07529761642217636, 'timestamp': '2025-10-01 04:24:02.639277', 'step': 9114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.672323', 'step': 9114, 'epoch': 2} {'type': 'loss', 'content': 0.1626923829317093, 'timestamp': '2025-10-01 04:24:02.674494', 'step': 9115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:02.711901', 'step': 9115, 'epoch': 2} {'type': 'loss', 'content': 0.11628047376871109, 'timestamp': '2025-10-01 04:24:02.735741', 'step': 9116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:02.774397', 'step': 9116, 'epoch': 2} {'type': 'loss', 'content': 0.06824036687612534, 'timestamp': '2025-10-01 04:24:02.776381', 'step': 9117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:02.808085', 'step': 9117, 'epoch': 2} {'type': 'loss', 'content': 0.0795087069272995, 'timestamp': '2025-10-01 04:24:02.810170', 'step': 9118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.846500', 'step': 9118, 'epoch': 2} {'type': 'loss', 'content': 0.12524540722370148, 'timestamp': '2025-10-01 04:24:02.849101', 'step': 9119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:02.886337', 'step': 9119, 'epoch': 2} {'type': 'loss', 'content': 0.1700287014245987, 'timestamp': '2025-10-01 04:24:02.910152', 'step': 9120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:02.946211', 'step': 9120, 'epoch': 2} {'type': 'loss', 'content': 0.05834966525435448, 'timestamp': '2025-10-01 04:24:02.948501', 'step': 9121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:02.980862', 'step': 9121, 'epoch': 2} {'type': 'loss', 'content': 0.12545768916606903, 'timestamp': '2025-10-01 04:24:02.983483', 'step': 9122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:03.018006', 'step': 9122, 'epoch': 2} {'type': 'loss', 'content': 0.07939504086971283, 'timestamp': '2025-10-01 04:24:03.020513', 'step': 9123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:03.053909', 'step': 9123, 'epoch': 2} {'type': 'loss', 'content': 0.09909270703792572, 'timestamp': '2025-10-01 04:24:03.078581', 'step': 9124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:03.122146', 'step': 9124, 'epoch': 2} {'type': 'loss', 'content': 0.089059978723526, 'timestamp': '2025-10-01 04:24:03.124502', 'step': 9125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:03.161056', 'step': 9125, 'epoch': 2} {'type': 'loss', 'content': 0.09579068422317505, 'timestamp': '2025-10-01 04:24:03.163731', 'step': 9126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:03.195951', 'step': 9126, 'epoch': 2} {'type': 'loss', 'content': 0.1490800380706787, 'timestamp': '2025-10-01 04:24:03.198749', 'step': 9127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:03.233814', 'step': 9127, 'epoch': 2} {'type': 'loss', 'content': 0.16411170363426208, 'timestamp': '2025-10-01 04:24:03.270052', 'step': 9128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:03.301865', 'step': 9128, 'epoch': 2} {'type': 'loss', 'content': 0.04847244173288345, 'timestamp': '2025-10-01 04:24:03.304730', 'step': 9129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:03.336512', 'step': 9129, 'epoch': 2} {'type': 'loss', 'content': 0.121198371052742, 'timestamp': '2025-10-01 04:24:03.339248', 'step': 9130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:03.372444', 'step': 9130, 'epoch': 2} {'type': 'loss', 'content': 0.13135260343551636, 'timestamp': '2025-10-01 04:24:03.374627', 'step': 9131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:03.411188', 'step': 9131, 'epoch': 2} {'type': 'loss', 'content': 0.12092642486095428, 'timestamp': '2025-10-01 04:24:03.435423', 'step': 9132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:03.468690', 'step': 9132, 'epoch': 2} {'type': 'loss', 'content': 0.08668027073144913, 'timestamp': '2025-10-01 04:24:03.471482', 'step': 9133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:03.510445', 'step': 9133, 'epoch': 2} {'type': 'loss', 'content': 0.11143545061349869, 'timestamp': '2025-10-01 04:24:03.514062', 'step': 9134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:03.546997', 'step': 9134, 'epoch': 2} {'type': 'loss', 'content': 0.1663307249546051, 'timestamp': '2025-10-01 04:24:03.549187', 'step': 9135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:03.586348', 'step': 9135, 'epoch': 2} {'type': 'loss', 'content': 0.10714282840490341, 'timestamp': '2025-10-01 04:24:03.610670', 'step': 9136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:03.644772', 'step': 9136, 'epoch': 2} {'type': 'loss', 'content': 0.0793561339378357, 'timestamp': '2025-10-01 04:24:03.647077', 'step': 9137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:03.705969', 'step': 9137, 'epoch': 2} {'type': 'loss', 'content': 0.12886463105678558, 'timestamp': '2025-10-01 04:24:03.708443', 'step': 9138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:24:03.743425', 'step': 9138, 'epoch': 2} {'type': 'loss', 'content': 0.09749282151460648, 'timestamp': '2025-10-01 04:24:03.750480', 'step': 9139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:03.785059', 'step': 9139, 'epoch': 2} {'type': 'loss', 'content': 0.1508709341287613, 'timestamp': '2025-10-01 04:24:03.809117', 'step': 9140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:03.844810', 'step': 9140, 'epoch': 2} {'type': 'loss', 'content': 0.10855388641357422, 'timestamp': '2025-10-01 04:24:03.847158', 'step': 9141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:03.888879', 'step': 9141, 'epoch': 2} {'type': 'loss', 'content': 0.06466546654701233, 'timestamp': '2025-10-01 04:24:03.890921', 'step': 9142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:03.928616', 'step': 9142, 'epoch': 2} {'type': 'loss', 'content': 0.08788774907588959, 'timestamp': '2025-10-01 04:24:03.931147', 'step': 9143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:03.966036', 'step': 9143, 'epoch': 2} {'type': 'loss', 'content': 0.11341697722673416, 'timestamp': '2025-10-01 04:24:03.990306', 'step': 9144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:04.023199', 'step': 9144, 'epoch': 2} {'type': 'loss', 'content': 0.20199337601661682, 'timestamp': '2025-10-01 04:24:04.030526', 'step': 9145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:04.064322', 'step': 9145, 'epoch': 2} {'type': 'loss', 'content': 0.060361750423908234, 'timestamp': '2025-10-01 04:24:04.066788', 'step': 9146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:04.100402', 'step': 9146, 'epoch': 2} {'type': 'loss', 'content': 0.1980922818183899, 'timestamp': '2025-10-01 04:24:04.102558', 'step': 9147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:04.135433', 'step': 9147, 'epoch': 2} {'type': 'loss', 'content': 0.04610352963209152, 'timestamp': '2025-10-01 04:24:04.159277', 'step': 9148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:04.202322', 'step': 9148, 'epoch': 2} {'type': 'loss', 'content': 0.06082036718726158, 'timestamp': '2025-10-01 04:24:04.204444', 'step': 9149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:04.250979', 'step': 9149, 'epoch': 2} {'type': 'loss', 'content': 0.15853218734264374, 'timestamp': '2025-10-01 04:24:04.253424', 'step': 9150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:04.285014', 'step': 9150, 'epoch': 2} {'type': 'loss', 'content': 0.2051645815372467, 'timestamp': '2025-10-01 04:24:04.287244', 'step': 9151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:04.328238', 'step': 9151, 'epoch': 2} {'type': 'loss', 'content': 0.0980597734451294, 'timestamp': '2025-10-01 04:24:04.352427', 'step': 9152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:04.388851', 'step': 9152, 'epoch': 2} {'type': 'loss', 'content': 0.17318527400493622, 'timestamp': '2025-10-01 04:24:04.391027', 'step': 9153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:04.423950', 'step': 9153, 'epoch': 2} {'type': 'loss', 'content': 0.0890580490231514, 'timestamp': '2025-10-01 04:24:04.426589', 'step': 9154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:04.459571', 'step': 9154, 'epoch': 2} {'type': 'loss', 'content': 0.11720434576272964, 'timestamp': '2025-10-01 04:24:04.462457', 'step': 9155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:04.496055', 'step': 9155, 'epoch': 2} {'type': 'loss', 'content': 0.07642988115549088, 'timestamp': '2025-10-01 04:24:04.523671', 'step': 9156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:04.558850', 'step': 9156, 'epoch': 2} {'type': 'loss', 'content': 0.11827994138002396, 'timestamp': '2025-10-01 04:24:04.561059', 'step': 9157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:04.594144', 'step': 9157, 'epoch': 2} {'type': 'loss', 'content': 0.08925803750753403, 'timestamp': '2025-10-01 04:24:04.596536', 'step': 9158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:04.639310', 'step': 9158, 'epoch': 2} {'type': 'loss', 'content': 0.059112124145030975, 'timestamp': '2025-10-01 04:24:04.641644', 'step': 9159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:04.677405', 'step': 9159, 'epoch': 2} {'type': 'loss', 'content': 0.17346633970737457, 'timestamp': '2025-10-01 04:24:04.701942', 'step': 9160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:04.738304', 'step': 9160, 'epoch': 2} {'type': 'loss', 'content': 0.05883362889289856, 'timestamp': '2025-10-01 04:24:04.757910', 'step': 9161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:04.791535', 'step': 9161, 'epoch': 2} {'type': 'loss', 'content': 0.08610286563634872, 'timestamp': '2025-10-01 04:24:04.793706', 'step': 9162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:04.829686', 'step': 9162, 'epoch': 2} {'type': 'loss', 'content': 0.06257558614015579, 'timestamp': '2025-10-01 04:24:04.834108', 'step': 9163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:04.871881', 'step': 9163, 'epoch': 2} {'type': 'loss', 'content': 0.0741758942604065, 'timestamp': '2025-10-01 04:24:04.895582', 'step': 9164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:04.935005', 'step': 9164, 'epoch': 2} {'type': 'loss', 'content': 0.2256210744380951, 'timestamp': '2025-10-01 04:24:04.937122', 'step': 9165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:04.970423', 'step': 9165, 'epoch': 2} {'type': 'loss', 'content': 0.13640713691711426, 'timestamp': '2025-10-01 04:24:04.972426', 'step': 9166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:05.008400', 'step': 9166, 'epoch': 2} {'type': 'loss', 'content': 0.0897228941321373, 'timestamp': '2025-10-01 04:24:05.011379', 'step': 9167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:05.045171', 'step': 9167, 'epoch': 2} {'type': 'loss', 'content': 0.0977967232465744, 'timestamp': '2025-10-01 04:24:05.068993', 'step': 9168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:05.102091', 'step': 9168, 'epoch': 2} {'type': 'loss', 'content': 0.10380012542009354, 'timestamp': '2025-10-01 04:24:05.104343', 'step': 9169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:05.136848', 'step': 9169, 'epoch': 2} {'type': 'loss', 'content': 0.1170688271522522, 'timestamp': '2025-10-01 04:24:05.139926', 'step': 9170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:24:05.172382', 'step': 9170, 'epoch': 2} {'type': 'loss', 'content': 0.09523726254701614, 'timestamp': '2025-10-01 04:24:05.176815', 'step': 9171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:05.213030', 'step': 9171, 'epoch': 2} {'type': 'loss', 'content': 0.12812010943889618, 'timestamp': '2025-10-01 04:24:05.236650', 'step': 9172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:05.274545', 'step': 9172, 'epoch': 2} {'type': 'loss', 'content': 0.05491698160767555, 'timestamp': '2025-10-01 04:24:05.278232', 'step': 9173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:05.316937', 'step': 9173, 'epoch': 2} {'type': 'loss', 'content': 0.21378175914287567, 'timestamp': '2025-10-01 04:24:05.319987', 'step': 9174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:05.361395', 'step': 9174, 'epoch': 2} {'type': 'loss', 'content': 0.23368661105632782, 'timestamp': '2025-10-01 04:24:05.364003', 'step': 9175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:05.396614', 'step': 9175, 'epoch': 2} {'type': 'loss', 'content': 0.07252911478281021, 'timestamp': '2025-10-01 04:24:05.420424', 'step': 9176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:05.452250', 'step': 9176, 'epoch': 2} {'type': 'loss', 'content': 0.05810463801026344, 'timestamp': '2025-10-01 04:24:05.454458', 'step': 9177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:05.495693', 'step': 9177, 'epoch': 2} {'type': 'loss', 'content': 0.11199953407049179, 'timestamp': '2025-10-01 04:24:05.497816', 'step': 9178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:05.530015', 'step': 9178, 'epoch': 2} {'type': 'loss', 'content': 0.25359973311424255, 'timestamp': '2025-10-01 04:24:05.532193', 'step': 9179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:05.563025', 'step': 9179, 'epoch': 2} {'type': 'loss', 'content': 0.08414462208747864, 'timestamp': '2025-10-01 04:24:05.586711', 'step': 9180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:05.626681', 'step': 9180, 'epoch': 2} {'type': 'loss', 'content': 0.06215870380401611, 'timestamp': '2025-10-01 04:24:05.628793', 'step': 9181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:05.662253', 'step': 9181, 'epoch': 2} {'type': 'loss', 'content': 0.09569580852985382, 'timestamp': '2025-10-01 04:24:05.664403', 'step': 9182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:05.695160', 'step': 9182, 'epoch': 2} {'type': 'loss', 'content': 0.14508375525474548, 'timestamp': '2025-10-01 04:24:05.697408', 'step': 9183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:05.734979', 'step': 9183, 'epoch': 2} {'type': 'loss', 'content': 0.12456914782524109, 'timestamp': '2025-10-01 04:24:05.758722', 'step': 9184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:05.789159', 'step': 9184, 'epoch': 2} {'type': 'loss', 'content': 0.07420848309993744, 'timestamp': '2025-10-01 04:24:05.791655', 'step': 9185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:05.826868', 'step': 9185, 'epoch': 2} {'type': 'loss', 'content': 0.0799279436469078, 'timestamp': '2025-10-01 04:24:05.829863', 'step': 9186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:05.886089', 'step': 9186, 'epoch': 2} {'type': 'loss', 'content': 0.1205352321267128, 'timestamp': '2025-10-01 04:24:05.889060', 'step': 9187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:05.925442', 'step': 9187, 'epoch': 2} {'type': 'loss', 'content': 0.09211480617523193, 'timestamp': '2025-10-01 04:24:05.949311', 'step': 9188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:05.986874', 'step': 9188, 'epoch': 2} {'type': 'loss', 'content': 0.17512308061122894, 'timestamp': '2025-10-01 04:24:05.990854', 'step': 9189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:06.031793', 'step': 9189, 'epoch': 2} {'type': 'loss', 'content': 0.0943940207362175, 'timestamp': '2025-10-01 04:24:06.033930', 'step': 9190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:06.066533', 'step': 9190, 'epoch': 2} {'type': 'loss', 'content': 0.05297189578413963, 'timestamp': '2025-10-01 04:24:06.071469', 'step': 9191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:06.116850', 'step': 9191, 'epoch': 2} {'type': 'loss', 'content': 0.1401250958442688, 'timestamp': '2025-10-01 04:24:06.140468', 'step': 9192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:06.175754', 'step': 9192, 'epoch': 2} {'type': 'loss', 'content': 0.19215083122253418, 'timestamp': '2025-10-01 04:24:06.177864', 'step': 9193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:06.209748', 'step': 9193, 'epoch': 2} {'type': 'loss', 'content': 0.14419914782047272, 'timestamp': '2025-10-01 04:24:06.212077', 'step': 9194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:06.253599', 'step': 9194, 'epoch': 2} {'type': 'loss', 'content': 0.12817606329917908, 'timestamp': '2025-10-01 04:24:06.255986', 'step': 9195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:06.289297', 'step': 9195, 'epoch': 2} {'type': 'loss', 'content': 0.12196239084005356, 'timestamp': '2025-10-01 04:24:06.313048', 'step': 9196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:06.345684', 'step': 9196, 'epoch': 2} {'type': 'loss', 'content': 0.06694898009300232, 'timestamp': '2025-10-01 04:24:06.347734', 'step': 9197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:06.379706', 'step': 9197, 'epoch': 2} {'type': 'loss', 'content': 0.12260233610868454, 'timestamp': '2025-10-01 04:24:06.382902', 'step': 9198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:06.414960', 'step': 9198, 'epoch': 2} {'type': 'loss', 'content': 0.1344904750585556, 'timestamp': '2025-10-01 04:24:06.417122', 'step': 9199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:06.457466', 'step': 9199, 'epoch': 2} {'type': 'loss', 'content': 0.17020228505134583, 'timestamp': '2025-10-01 04:24:06.481577', 'step': 9200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:06.523914', 'step': 9200, 'epoch': 2} {'type': 'loss', 'content': 0.07028680294752121, 'timestamp': '2025-10-01 04:24:06.536159', 'step': 9201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:06.584588', 'step': 9201, 'epoch': 2} {'type': 'loss', 'content': 0.0897761806845665, 'timestamp': '2025-10-01 04:24:06.586892', 'step': 9202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:06.626460', 'step': 9202, 'epoch': 2} {'type': 'loss', 'content': 0.1094607412815094, 'timestamp': '2025-10-01 04:24:06.628723', 'step': 9203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:06.668593', 'step': 9203, 'epoch': 2} {'type': 'loss', 'content': 0.07088121771812439, 'timestamp': '2025-10-01 04:24:06.695292', 'step': 9204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:06.738435', 'step': 9204, 'epoch': 2} {'type': 'loss', 'content': 0.1610954999923706, 'timestamp': '2025-10-01 04:24:06.744015', 'step': 9205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:06.783343', 'step': 9205, 'epoch': 2} {'type': 'loss', 'content': 0.16126768290996552, 'timestamp': '2025-10-01 04:24:06.785551', 'step': 9206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:06.832193', 'step': 9206, 'epoch': 2} {'type': 'loss', 'content': 0.08588646352291107, 'timestamp': '2025-10-01 04:24:06.834410', 'step': 9207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:06.869048', 'step': 9207, 'epoch': 2} {'type': 'loss', 'content': 0.09250575304031372, 'timestamp': '2025-10-01 04:24:06.896495', 'step': 9208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:06.942367', 'step': 9208, 'epoch': 2} {'type': 'loss', 'content': 0.08012837171554565, 'timestamp': '2025-10-01 04:24:06.944687', 'step': 9209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:06.987522', 'step': 9209, 'epoch': 2} {'type': 'loss', 'content': 0.1256442666053772, 'timestamp': '2025-10-01 04:24:06.990873', 'step': 9210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:07.029774', 'step': 9210, 'epoch': 2} {'type': 'loss', 'content': 0.1488323211669922, 'timestamp': '2025-10-01 04:24:07.031575', 'step': 9211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:07.063999', 'step': 9211, 'epoch': 2} {'type': 'loss', 'content': 0.14229808747768402, 'timestamp': '2025-10-01 04:24:07.087662', 'step': 9212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:07.130337', 'step': 9212, 'epoch': 2} {'type': 'loss', 'content': 0.13788779079914093, 'timestamp': '2025-10-01 04:24:07.133048', 'step': 9213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:07.167134', 'step': 9213, 'epoch': 2} {'type': 'loss', 'content': 0.1411418318748474, 'timestamp': '2025-10-01 04:24:07.169550', 'step': 9214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:07.201118', 'step': 9214, 'epoch': 2} {'type': 'loss', 'content': 0.05637561157345772, 'timestamp': '2025-10-01 04:24:07.203298', 'step': 9215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:07.241286', 'step': 9215, 'epoch': 2} {'type': 'loss', 'content': 0.06891167163848877, 'timestamp': '2025-10-01 04:24:07.265017', 'step': 9216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:07.297623', 'step': 9216, 'epoch': 2} {'type': 'loss', 'content': 0.09358619153499603, 'timestamp': '2025-10-01 04:24:07.299849', 'step': 9217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:07.339064', 'step': 9217, 'epoch': 2} {'type': 'loss', 'content': 0.09874222427606583, 'timestamp': '2025-10-01 04:24:07.341408', 'step': 9218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:07.374243', 'step': 9218, 'epoch': 2} {'type': 'loss', 'content': 0.09144873917102814, 'timestamp': '2025-10-01 04:24:07.376315', 'step': 9219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:07.410749', 'step': 9219, 'epoch': 2} {'type': 'loss', 'content': 0.09613106399774551, 'timestamp': '2025-10-01 04:24:07.434548', 'step': 9220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:07.466216', 'step': 9220, 'epoch': 2} {'type': 'loss', 'content': 0.09483455121517181, 'timestamp': '2025-10-01 04:24:07.468410', 'step': 9221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:07.502285', 'step': 9221, 'epoch': 2} {'type': 'loss', 'content': 0.09788382798433304, 'timestamp': '2025-10-01 04:24:07.504494', 'step': 9222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:07.535980', 'step': 9222, 'epoch': 2} {'type': 'loss', 'content': 0.1076091080904007, 'timestamp': '2025-10-01 04:24:07.538520', 'step': 9223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:07.571601', 'step': 9223, 'epoch': 2} {'type': 'loss', 'content': 0.16209150850772858, 'timestamp': '2025-10-01 04:24:07.595178', 'step': 9224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:07.631308', 'step': 9224, 'epoch': 2} {'type': 'loss', 'content': 0.10333762317895889, 'timestamp': '2025-10-01 04:24:07.633565', 'step': 9225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:07.666070', 'step': 9225, 'epoch': 2} {'type': 'loss', 'content': 0.10001404583454132, 'timestamp': '2025-10-01 04:24:07.668106', 'step': 9226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:07.705134', 'step': 9226, 'epoch': 2} {'type': 'loss', 'content': 0.0983678475022316, 'timestamp': '2025-10-01 04:24:07.708177', 'step': 9227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:07.750779', 'step': 9227, 'epoch': 2} {'type': 'loss', 'content': 0.11854805797338486, 'timestamp': '2025-10-01 04:24:07.774622', 'step': 9228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:07.808065', 'step': 9228, 'epoch': 2} {'type': 'loss', 'content': 0.10544232279062271, 'timestamp': '2025-10-01 04:24:07.810268', 'step': 9229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:07.846045', 'step': 9229, 'epoch': 2} {'type': 'loss', 'content': 0.1184857115149498, 'timestamp': '2025-10-01 04:24:07.848241', 'step': 9230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:07.894588', 'step': 9230, 'epoch': 2} {'type': 'loss', 'content': 0.0997251644730568, 'timestamp': '2025-10-01 04:24:07.896983', 'step': 9231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:07.937714', 'step': 9231, 'epoch': 2} {'type': 'loss', 'content': 0.059505920857191086, 'timestamp': '2025-10-01 04:24:07.961690', 'step': 9232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:07.995329', 'step': 9232, 'epoch': 2} {'type': 'loss', 'content': 0.12885871529579163, 'timestamp': '2025-10-01 04:24:08.013522', 'step': 9233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:08.046050', 'step': 9233, 'epoch': 2} {'type': 'loss', 'content': 0.15363365411758423, 'timestamp': '2025-10-01 04:24:08.048299', 'step': 9234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:08.080858', 'step': 9234, 'epoch': 2} {'type': 'loss', 'content': 0.15893946588039398, 'timestamp': '2025-10-01 04:24:08.083109', 'step': 9235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:08.115588', 'step': 9235, 'epoch': 2} {'type': 'loss', 'content': 0.15012727677822113, 'timestamp': '2025-10-01 04:24:08.139321', 'step': 9236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:08.178556', 'step': 9236, 'epoch': 2} {'type': 'loss', 'content': 0.1413263976573944, 'timestamp': '2025-10-01 04:24:08.180764', 'step': 9237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:08.219332', 'step': 9237, 'epoch': 2} {'type': 'loss', 'content': 0.11396566778421402, 'timestamp': '2025-10-01 04:24:08.221597', 'step': 9238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:08.253436', 'step': 9238, 'epoch': 2} {'type': 'loss', 'content': 0.16095492243766785, 'timestamp': '2025-10-01 04:24:08.256032', 'step': 9239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:08.288607', 'step': 9239, 'epoch': 2} {'type': 'loss', 'content': 0.10247671604156494, 'timestamp': '2025-10-01 04:24:08.321799', 'step': 9240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:08.354468', 'step': 9240, 'epoch': 2} {'type': 'loss', 'content': 0.17991723120212555, 'timestamp': '2025-10-01 04:24:08.356576', 'step': 9241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:08.401180', 'step': 9241, 'epoch': 2} {'type': 'loss', 'content': 0.12731514871120453, 'timestamp': '2025-10-01 04:24:08.403478', 'step': 9242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:08.435829', 'step': 9242, 'epoch': 2} {'type': 'loss', 'content': 0.08133222907781601, 'timestamp': '2025-10-01 04:24:08.439759', 'step': 9243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:08.481219', 'step': 9243, 'epoch': 2} {'type': 'loss', 'content': 0.13145966827869415, 'timestamp': '2025-10-01 04:24:08.504963', 'step': 9244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:08.542090', 'step': 9244, 'epoch': 2} {'type': 'loss', 'content': 0.11151577532291412, 'timestamp': '2025-10-01 04:24:08.544025', 'step': 9245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:08.575798', 'step': 9245, 'epoch': 2} {'type': 'loss', 'content': 0.03910364583134651, 'timestamp': '2025-10-01 04:24:08.578343', 'step': 9246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:08.614685', 'step': 9246, 'epoch': 2} {'type': 'loss', 'content': 0.09123218059539795, 'timestamp': '2025-10-01 04:24:08.616994', 'step': 9247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:08.652683', 'step': 9247, 'epoch': 2} {'type': 'loss', 'content': 0.09653662145137787, 'timestamp': '2025-10-01 04:24:08.676899', 'step': 9248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:08.718939', 'step': 9248, 'epoch': 2} {'type': 'loss', 'content': 0.0987972766160965, 'timestamp': '2025-10-01 04:24:08.720996', 'step': 9249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:08.763676', 'step': 9249, 'epoch': 2} {'type': 'loss', 'content': 0.16121672093868256, 'timestamp': '2025-10-01 04:24:08.765986', 'step': 9250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:08.807119', 'step': 9250, 'epoch': 2} {'type': 'loss', 'content': 0.10460828244686127, 'timestamp': '2025-10-01 04:24:08.809266', 'step': 9251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:08.846548', 'step': 9251, 'epoch': 2} {'type': 'loss', 'content': 0.10898961126804352, 'timestamp': '2025-10-01 04:24:08.870234', 'step': 9252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:08.902250', 'step': 9252, 'epoch': 2} {'type': 'loss', 'content': 0.08536676317453384, 'timestamp': '2025-10-01 04:24:08.904368', 'step': 9253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:08.942628', 'step': 9253, 'epoch': 2} {'type': 'loss', 'content': 0.1433897614479065, 'timestamp': '2025-10-01 04:24:08.944992', 'step': 9254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:08.981482', 'step': 9254, 'epoch': 2} {'type': 'loss', 'content': 0.07543645799160004, 'timestamp': '2025-10-01 04:24:08.983712', 'step': 9255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:09.015056', 'step': 9255, 'epoch': 2} {'type': 'loss', 'content': 0.19324755668640137, 'timestamp': '2025-10-01 04:24:09.038979', 'step': 9256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:09.070419', 'step': 9256, 'epoch': 2} {'type': 'loss', 'content': 0.23924031853675842, 'timestamp': '2025-10-01 04:24:09.073087', 'step': 9257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:09.104689', 'step': 9257, 'epoch': 2} {'type': 'loss', 'content': 0.19057974219322205, 'timestamp': '2025-10-01 04:24:09.106991', 'step': 9258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:09.137793', 'step': 9258, 'epoch': 2} {'type': 'loss', 'content': 0.14957654476165771, 'timestamp': '2025-10-01 04:24:09.140095', 'step': 9259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:09.171029', 'step': 9259, 'epoch': 2} {'type': 'loss', 'content': 0.08623497188091278, 'timestamp': '2025-10-01 04:24:09.194578', 'step': 9260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:09.227008', 'step': 9260, 'epoch': 2} {'type': 'loss', 'content': 0.12355191260576248, 'timestamp': '2025-10-01 04:24:09.229275', 'step': 9261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:09.260407', 'step': 9261, 'epoch': 2} {'type': 'loss', 'content': 0.081168532371521, 'timestamp': '2025-10-01 04:24:09.262485', 'step': 9262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:09.292906', 'step': 9262, 'epoch': 2} {'type': 'loss', 'content': 0.08042822033166885, 'timestamp': '2025-10-01 04:24:09.294779', 'step': 9263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:09.324620', 'step': 9263, 'epoch': 2} {'type': 'loss', 'content': 0.1607673019170761, 'timestamp': '2025-10-01 04:24:09.348257', 'step': 9264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:09.378811', 'step': 9264, 'epoch': 2} {'type': 'loss', 'content': 0.18916472792625427, 'timestamp': '2025-10-01 04:24:09.384314', 'step': 9265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:09.414915', 'step': 9265, 'epoch': 2} {'type': 'loss', 'content': 0.17563657462596893, 'timestamp': '2025-10-01 04:24:09.416991', 'step': 9266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:09.446706', 'step': 9266, 'epoch': 2} {'type': 'loss', 'content': 0.14525994658470154, 'timestamp': '2025-10-01 04:24:09.448858', 'step': 9267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:09.479060', 'step': 9267, 'epoch': 2} {'type': 'loss', 'content': 0.10633864998817444, 'timestamp': '2025-10-01 04:24:09.502956', 'step': 9268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:09.533166', 'step': 9268, 'epoch': 2} {'type': 'loss', 'content': 0.10650837421417236, 'timestamp': '2025-10-01 04:24:09.535316', 'step': 9269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:24:09.565509', 'step': 9269, 'epoch': 2} {'type': 'loss', 'content': 0.13102702796459198, 'timestamp': '2025-10-01 04:24:09.570273', 'step': 9270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:09.601143', 'step': 9270, 'epoch': 2} {'type': 'loss', 'content': 0.07277721166610718, 'timestamp': '2025-10-01 04:24:09.603260', 'step': 9271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:09.642537', 'step': 9271, 'epoch': 2} {'type': 'loss', 'content': 0.11562778055667877, 'timestamp': '2025-10-01 04:24:09.666108', 'step': 9272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:09.700802', 'step': 9272, 'epoch': 2} {'type': 'loss', 'content': 0.0895988941192627, 'timestamp': '2025-10-01 04:24:09.703050', 'step': 9273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:24:09.734714', 'step': 9273, 'epoch': 2} {'type': 'loss', 'content': 0.0711091160774231, 'timestamp': '2025-10-01 04:24:09.739388', 'step': 9274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:09.769723', 'step': 9274, 'epoch': 2} {'type': 'loss', 'content': 0.08199885487556458, 'timestamp': '2025-10-01 04:24:09.787977', 'step': 9275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:09.818063', 'step': 9275, 'epoch': 2} {'type': 'loss', 'content': 0.12921488285064697, 'timestamp': '2025-10-01 04:24:09.841650', 'step': 9276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:09.873932', 'step': 9276, 'epoch': 2} {'type': 'loss', 'content': 0.11566950380802155, 'timestamp': '2025-10-01 04:24:09.876285', 'step': 9277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:09.908258', 'step': 9277, 'epoch': 2} {'type': 'loss', 'content': 0.0647018626332283, 'timestamp': '2025-10-01 04:24:09.910957', 'step': 9278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:09.941810', 'step': 9278, 'epoch': 2} {'type': 'loss', 'content': 0.15537719428539276, 'timestamp': '2025-10-01 04:24:09.955752', 'step': 9279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:10.003176', 'step': 9279, 'epoch': 2} {'type': 'loss', 'content': 0.14371328055858612, 'timestamp': '2025-10-01 04:24:10.030745', 'step': 9280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:10.064406', 'step': 9280, 'epoch': 2} {'type': 'loss', 'content': 0.07209514826536179, 'timestamp': '2025-10-01 04:24:10.066569', 'step': 9281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:10.096135', 'step': 9281, 'epoch': 2} {'type': 'loss', 'content': 0.12230808287858963, 'timestamp': '2025-10-01 04:24:10.098550', 'step': 9282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:10.128809', 'step': 9282, 'epoch': 2} {'type': 'loss', 'content': 0.10049498081207275, 'timestamp': '2025-10-01 04:24:10.130861', 'step': 9283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:10.160918', 'step': 9283, 'epoch': 2} {'type': 'loss', 'content': 0.10802057385444641, 'timestamp': '2025-10-01 04:24:10.184519', 'step': 9284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:10.217069', 'step': 9284, 'epoch': 2} {'type': 'loss', 'content': 0.20043149590492249, 'timestamp': '2025-10-01 04:24:10.219106', 'step': 9285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:10.250079', 'step': 9285, 'epoch': 2} {'type': 'loss', 'content': 0.13499106466770172, 'timestamp': '2025-10-01 04:24:10.253930', 'step': 9286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:10.287546', 'step': 9286, 'epoch': 2} {'type': 'loss', 'content': 0.15879572927951813, 'timestamp': '2025-10-01 04:24:10.289557', 'step': 9287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:10.319450', 'step': 9287, 'epoch': 2} {'type': 'loss', 'content': 0.06847820430994034, 'timestamp': '2025-10-01 04:24:10.342955', 'step': 9288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:10.381450', 'step': 9288, 'epoch': 2} {'type': 'loss', 'content': 0.07785981893539429, 'timestamp': '2025-10-01 04:24:10.383459', 'step': 9289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:10.413885', 'step': 9289, 'epoch': 2} {'type': 'loss', 'content': 0.04903043434023857, 'timestamp': '2025-10-01 04:24:10.416123', 'step': 9290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:10.448945', 'step': 9290, 'epoch': 2} {'type': 'loss', 'content': 0.1446554809808731, 'timestamp': '2025-10-01 04:24:10.451186', 'step': 9291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:10.488536', 'step': 9291, 'epoch': 2} {'type': 'loss', 'content': 0.13104437291622162, 'timestamp': '2025-10-01 04:24:10.511978', 'step': 9292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:10.553196', 'step': 9292, 'epoch': 2} {'type': 'loss', 'content': 0.17287799715995789, 'timestamp': '2025-10-01 04:24:10.558884', 'step': 9293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:10.609592', 'step': 9293, 'epoch': 2} {'type': 'loss', 'content': 0.16410306096076965, 'timestamp': '2025-10-01 04:24:10.611748', 'step': 9294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:10.643859', 'step': 9294, 'epoch': 2} {'type': 'loss', 'content': 0.06441857665777206, 'timestamp': '2025-10-01 04:24:10.646292', 'step': 9295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:10.678032', 'step': 9295, 'epoch': 2} {'type': 'loss', 'content': 0.1692844033241272, 'timestamp': '2025-10-01 04:24:10.701480', 'step': 9296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:10.732616', 'step': 9296, 'epoch': 2} {'type': 'loss', 'content': 0.05594746768474579, 'timestamp': '2025-10-01 04:24:10.734737', 'step': 9297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:10.765642', 'step': 9297, 'epoch': 2} {'type': 'loss', 'content': 0.15711042284965515, 'timestamp': '2025-10-01 04:24:10.767744', 'step': 9298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:10.798049', 'step': 9298, 'epoch': 2} {'type': 'loss', 'content': 0.20014123618602753, 'timestamp': '2025-10-01 04:24:10.800193', 'step': 9299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:10.830434', 'step': 9299, 'epoch': 2} {'type': 'loss', 'content': 0.1908518373966217, 'timestamp': '2025-10-01 04:24:10.854629', 'step': 9300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:10.885235', 'step': 9300, 'epoch': 2} {'type': 'loss', 'content': 0.0751861035823822, 'timestamp': '2025-10-01 04:24:10.887222', 'step': 9301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:10.917908', 'step': 9301, 'epoch': 2} {'type': 'loss', 'content': 0.11409670859575272, 'timestamp': '2025-10-01 04:24:10.920936', 'step': 9302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:10.950431', 'step': 9302, 'epoch': 2} {'type': 'loss', 'content': 0.09043703228235245, 'timestamp': '2025-10-01 04:24:10.953132', 'step': 9303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:10.986558', 'step': 9303, 'epoch': 2} {'type': 'loss', 'content': 0.07031136006116867, 'timestamp': '2025-10-01 04:24:11.010354', 'step': 9304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:11.041965', 'step': 9304, 'epoch': 2} {'type': 'loss', 'content': 0.10704436153173447, 'timestamp': '2025-10-01 04:24:11.044811', 'step': 9305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:11.076628', 'step': 9305, 'epoch': 2} {'type': 'loss', 'content': 0.10203931480646133, 'timestamp': '2025-10-01 04:24:11.078835', 'step': 9306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:11.109801', 'step': 9306, 'epoch': 2} {'type': 'loss', 'content': 0.0932028740644455, 'timestamp': '2025-10-01 04:24:11.113115', 'step': 9307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:11.145073', 'step': 9307, 'epoch': 2} {'type': 'loss', 'content': 0.1372395008802414, 'timestamp': '2025-10-01 04:24:11.169118', 'step': 9308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:11.199823', 'step': 9308, 'epoch': 2} {'type': 'loss', 'content': 0.11662107706069946, 'timestamp': '2025-10-01 04:24:11.202429', 'step': 9309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:11.233970', 'step': 9309, 'epoch': 2} {'type': 'loss', 'content': 0.0909927487373352, 'timestamp': '2025-10-01 04:24:11.236456', 'step': 9310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:11.268561', 'step': 9310, 'epoch': 2} {'type': 'loss', 'content': 0.07516787946224213, 'timestamp': '2025-10-01 04:24:11.271205', 'step': 9311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:11.301353', 'step': 9311, 'epoch': 2} {'type': 'loss', 'content': 0.15803584456443787, 'timestamp': '2025-10-01 04:24:11.326274', 'step': 9312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:11.360259', 'step': 9312, 'epoch': 2} {'type': 'loss', 'content': 0.12652555108070374, 'timestamp': '2025-10-01 04:24:11.362933', 'step': 9313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:11.396311', 'step': 9313, 'epoch': 2} {'type': 'loss', 'content': 0.07626382261514664, 'timestamp': '2025-10-01 04:24:11.398648', 'step': 9314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:11.429935', 'step': 9314, 'epoch': 2} {'type': 'loss', 'content': 0.09295377880334854, 'timestamp': '2025-10-01 04:24:11.432257', 'step': 9315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:11.466590', 'step': 9315, 'epoch': 2} {'type': 'loss', 'content': 0.08609482645988464, 'timestamp': '2025-10-01 04:24:11.491536', 'step': 9316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:11.526400', 'step': 9316, 'epoch': 2} {'type': 'loss', 'content': 0.09879717230796814, 'timestamp': '2025-10-01 04:24:11.528992', 'step': 9317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:11.561163', 'step': 9317, 'epoch': 2} {'type': 'loss', 'content': 0.0853804349899292, 'timestamp': '2025-10-01 04:24:11.564937', 'step': 9318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:11.596057', 'step': 9318, 'epoch': 2} {'type': 'loss', 'content': 0.062460556626319885, 'timestamp': '2025-10-01 04:24:11.598853', 'step': 9319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:11.630106', 'step': 9319, 'epoch': 2} {'type': 'loss', 'content': 0.1008630245923996, 'timestamp': '2025-10-01 04:24:11.654222', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:24:20.175195', 'step': 9320, 'epoch': 2} {'type': 'pplx', 'content': 11332.525042450203, 'timestamp': '2025-10-01 04:24:20.178428', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:20.208208', 'step': 9320, 'epoch': 2} {'type': 'loss', 'content': 0.1071830466389656, 'timestamp': '2025-10-01 04:24:20.210714', 'step': 9321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:20.243042', 'step': 9321, 'epoch': 2} {'type': 'loss', 'content': 0.11514564603567123, 'timestamp': '2025-10-01 04:24:20.246391', 'step': 9322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:20.279344', 'step': 9322, 'epoch': 2} {'type': 'loss', 'content': 0.11746028065681458, 'timestamp': '2025-10-01 04:24:20.281713', 'step': 9323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:20.312889', 'step': 9323, 'epoch': 2} {'type': 'loss', 'content': 0.2003128081560135, 'timestamp': '2025-10-01 04:24:20.337145', 'step': 9324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:20.368606', 'step': 9324, 'epoch': 2} {'type': 'loss', 'content': 0.13204814493656158, 'timestamp': '2025-10-01 04:24:20.370917', 'step': 9325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:20.410590', 'step': 9325, 'epoch': 2} {'type': 'loss', 'content': 0.10289111733436584, 'timestamp': '2025-10-01 04:24:20.414124', 'step': 9326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:20.444958', 'step': 9326, 'epoch': 2} {'type': 'loss', 'content': 0.07070611417293549, 'timestamp': '2025-10-01 04:24:20.447383', 'step': 9327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:20.481048', 'step': 9327, 'epoch': 2} {'type': 'loss', 'content': 0.17119845747947693, 'timestamp': '2025-10-01 04:24:20.505280', 'step': 9328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:20.536071', 'step': 9328, 'epoch': 2} {'type': 'loss', 'content': 0.09633006900548935, 'timestamp': '2025-10-01 04:24:20.538540', 'step': 9329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:20.569371', 'step': 9329, 'epoch': 2} {'type': 'loss', 'content': 0.103012815117836, 'timestamp': '2025-10-01 04:24:20.571747', 'step': 9330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:20.602896', 'step': 9330, 'epoch': 2} {'type': 'loss', 'content': 0.1400943547487259, 'timestamp': '2025-10-01 04:24:20.609538', 'step': 9331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:20.641398', 'step': 9331, 'epoch': 2} {'type': 'loss', 'content': 0.12405717372894287, 'timestamp': '2025-10-01 04:24:20.678072', 'step': 9332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:20.709448', 'step': 9332, 'epoch': 2} {'type': 'loss', 'content': 0.12860605120658875, 'timestamp': '2025-10-01 04:24:20.711909', 'step': 9333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:20.742671', 'step': 9333, 'epoch': 2} {'type': 'loss', 'content': 0.05276751518249512, 'timestamp': '2025-10-01 04:24:20.744659', 'step': 9334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:20.776001', 'step': 9334, 'epoch': 2} {'type': 'loss', 'content': 0.07994551211595535, 'timestamp': '2025-10-01 04:24:20.778482', 'step': 9335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:20.809499', 'step': 9335, 'epoch': 2} {'type': 'loss', 'content': 0.062086161226034164, 'timestamp': '2025-10-01 04:24:20.834157', 'step': 9336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:20.866075', 'step': 9336, 'epoch': 2} {'type': 'loss', 'content': 0.12643177807331085, 'timestamp': '2025-10-01 04:24:20.868436', 'step': 9337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:20.899410', 'step': 9337, 'epoch': 2} {'type': 'loss', 'content': 0.0653054416179657, 'timestamp': '2025-10-01 04:24:20.901673', 'step': 9338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:20.931597', 'step': 9338, 'epoch': 2} {'type': 'loss', 'content': 0.07858411967754364, 'timestamp': '2025-10-01 04:24:20.934323', 'step': 9339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:20.965799', 'step': 9339, 'epoch': 2} {'type': 'loss', 'content': 0.08294349163770676, 'timestamp': '2025-10-01 04:24:20.989545', 'step': 9340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:21.023256', 'step': 9340, 'epoch': 2} {'type': 'loss', 'content': 0.11638268828392029, 'timestamp': '2025-10-01 04:24:21.026469', 'step': 9341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:21.059546', 'step': 9341, 'epoch': 2} {'type': 'loss', 'content': 0.051177844405174255, 'timestamp': '2025-10-01 04:24:21.062155', 'step': 9342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:21.096976', 'step': 9342, 'epoch': 2} {'type': 'loss', 'content': 0.1283956617116928, 'timestamp': '2025-10-01 04:24:21.099289', 'step': 9343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:21.132547', 'step': 9343, 'epoch': 2} {'type': 'loss', 'content': 0.06797156482934952, 'timestamp': '2025-10-01 04:24:21.157198', 'step': 9344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:21.192049', 'step': 9344, 'epoch': 2} {'type': 'loss', 'content': 0.09936248511075974, 'timestamp': '2025-10-01 04:24:21.194272', 'step': 9345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:21.225546', 'step': 9345, 'epoch': 2} {'type': 'loss', 'content': 0.06657660752534866, 'timestamp': '2025-10-01 04:24:21.227871', 'step': 9346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:21.258965', 'step': 9346, 'epoch': 2} {'type': 'loss', 'content': 0.1265302300453186, 'timestamp': '2025-10-01 04:24:21.261253', 'step': 9347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:21.292888', 'step': 9347, 'epoch': 2} {'type': 'loss', 'content': 0.12473641335964203, 'timestamp': '2025-10-01 04:24:21.316869', 'step': 9348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:21.349225', 'step': 9348, 'epoch': 2} {'type': 'loss', 'content': 0.059425897896289825, 'timestamp': '2025-10-01 04:24:21.351382', 'step': 9349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:21.383447', 'step': 9349, 'epoch': 2} {'type': 'loss', 'content': 0.1666732132434845, 'timestamp': '2025-10-01 04:24:21.385937', 'step': 9350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:21.422531', 'step': 9350, 'epoch': 2} {'type': 'loss', 'content': 0.08095835894346237, 'timestamp': '2025-10-01 04:24:21.425272', 'step': 9351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:21.456104', 'step': 9351, 'epoch': 2} {'type': 'loss', 'content': 0.07855602353811264, 'timestamp': '2025-10-01 04:24:21.480197', 'step': 9352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:21.509919', 'step': 9352, 'epoch': 2} {'type': 'loss', 'content': 0.06313598155975342, 'timestamp': '2025-10-01 04:24:21.512154', 'step': 9353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:21.542529', 'step': 9353, 'epoch': 2} {'type': 'loss', 'content': 0.10035598278045654, 'timestamp': '2025-10-01 04:24:21.545004', 'step': 9354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:21.576198', 'step': 9354, 'epoch': 2} {'type': 'loss', 'content': 0.11244689673185349, 'timestamp': '2025-10-01 04:24:21.578738', 'step': 9355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:21.609324', 'step': 9355, 'epoch': 2} {'type': 'loss', 'content': 0.14021562039852142, 'timestamp': '2025-10-01 04:24:21.641418', 'step': 9356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:21.679725', 'step': 9356, 'epoch': 2} {'type': 'loss', 'content': 0.05605246499180794, 'timestamp': '2025-10-01 04:24:21.683304', 'step': 9357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:21.714264', 'step': 9357, 'epoch': 2} {'type': 'loss', 'content': 0.059020716696977615, 'timestamp': '2025-10-01 04:24:21.716591', 'step': 9358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:21.747324', 'step': 9358, 'epoch': 2} {'type': 'loss', 'content': 0.10373474657535553, 'timestamp': '2025-10-01 04:24:21.750021', 'step': 9359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:21.779814', 'step': 9359, 'epoch': 2} {'type': 'loss', 'content': 0.09493831545114517, 'timestamp': '2025-10-01 04:24:21.803594', 'step': 9360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:21.833743', 'step': 9360, 'epoch': 2} {'type': 'loss', 'content': 0.15638034045696259, 'timestamp': '2025-10-01 04:24:21.835753', 'step': 9361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:21.866506', 'step': 9361, 'epoch': 2} {'type': 'loss', 'content': 0.16853547096252441, 'timestamp': '2025-10-01 04:24:21.869971', 'step': 9362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:21.900462', 'step': 9362, 'epoch': 2} {'type': 'loss', 'content': 0.20102155208587646, 'timestamp': '2025-10-01 04:24:21.902507', 'step': 9363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:21.933289', 'step': 9363, 'epoch': 2} {'type': 'loss', 'content': 0.21202769875526428, 'timestamp': '2025-10-01 04:24:21.956811', 'step': 9364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:21.993054', 'step': 9364, 'epoch': 2} {'type': 'loss', 'content': 0.1032983809709549, 'timestamp': '2025-10-01 04:24:21.995106', 'step': 9365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:22.025133', 'step': 9365, 'epoch': 2} {'type': 'loss', 'content': 0.17970721423625946, 'timestamp': '2025-10-01 04:24:22.027158', 'step': 9366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:22.058410', 'step': 9366, 'epoch': 2} {'type': 'loss', 'content': 0.13848887383937836, 'timestamp': '2025-10-01 04:24:22.060445', 'step': 9367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:22.090569', 'step': 9367, 'epoch': 2} {'type': 'loss', 'content': 0.13380922377109528, 'timestamp': '2025-10-01 04:24:22.114075', 'step': 9368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:22.144686', 'step': 9368, 'epoch': 2} {'type': 'loss', 'content': 0.15980158746242523, 'timestamp': '2025-10-01 04:24:22.147923', 'step': 9369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:22.178241', 'step': 9369, 'epoch': 2} {'type': 'loss', 'content': 0.11620088666677475, 'timestamp': '2025-10-01 04:24:22.180616', 'step': 9370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:22.210601', 'step': 9370, 'epoch': 2} {'type': 'loss', 'content': 0.10183463245630264, 'timestamp': '2025-10-01 04:24:22.212820', 'step': 9371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:22.243280', 'step': 9371, 'epoch': 2} {'type': 'loss', 'content': 0.11752429604530334, 'timestamp': '2025-10-01 04:24:22.267200', 'step': 9372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:22.296910', 'step': 9372, 'epoch': 2} {'type': 'loss', 'content': 0.0829346776008606, 'timestamp': '2025-10-01 04:24:22.299361', 'step': 9373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:22.329463', 'step': 9373, 'epoch': 2} {'type': 'loss', 'content': 0.1347431093454361, 'timestamp': '2025-10-01 04:24:22.331460', 'step': 9374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:22.361561', 'step': 9374, 'epoch': 2} {'type': 'loss', 'content': 0.1580924540758133, 'timestamp': '2025-10-01 04:24:22.363580', 'step': 9375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:22.393679', 'step': 9375, 'epoch': 2} {'type': 'loss', 'content': 0.11633320897817612, 'timestamp': '2025-10-01 04:24:22.417724', 'step': 9376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:22.448756', 'step': 9376, 'epoch': 2} {'type': 'loss', 'content': 0.11111359298229218, 'timestamp': '2025-10-01 04:24:22.450878', 'step': 9377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:22.480909', 'step': 9377, 'epoch': 2} {'type': 'loss', 'content': 0.14847400784492493, 'timestamp': '2025-10-01 04:24:22.483378', 'step': 9378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:22.513866', 'step': 9378, 'epoch': 2} {'type': 'loss', 'content': 0.10469478368759155, 'timestamp': '2025-10-01 04:24:22.515927', 'step': 9379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:22.546316', 'step': 9379, 'epoch': 2} {'type': 'loss', 'content': 0.0697106346487999, 'timestamp': '2025-10-01 04:24:22.570062', 'step': 9380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:22.600509', 'step': 9380, 'epoch': 2} {'type': 'loss', 'content': 0.14810653030872345, 'timestamp': '2025-10-01 04:24:22.602727', 'step': 9381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:22.632890', 'step': 9381, 'epoch': 2} {'type': 'loss', 'content': 0.09617990255355835, 'timestamp': '2025-10-01 04:24:22.634979', 'step': 9382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:22.665366', 'step': 9382, 'epoch': 2} {'type': 'loss', 'content': 0.06952018290758133, 'timestamp': '2025-10-01 04:24:22.675466', 'step': 9383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:22.705822', 'step': 9383, 'epoch': 2} {'type': 'loss', 'content': 0.17142190039157867, 'timestamp': '2025-10-01 04:24:22.729375', 'step': 9384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:22.759096', 'step': 9384, 'epoch': 2} {'type': 'loss', 'content': 0.1550932675600052, 'timestamp': '2025-10-01 04:24:22.761305', 'step': 9385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:22.793755', 'step': 9385, 'epoch': 2} {'type': 'loss', 'content': 0.1844065934419632, 'timestamp': '2025-10-01 04:24:22.796252', 'step': 9386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:22.826777', 'step': 9386, 'epoch': 2} {'type': 'loss', 'content': 0.05231057479977608, 'timestamp': '2025-10-01 04:24:22.829711', 'step': 9387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:22.860034', 'step': 9387, 'epoch': 2} {'type': 'loss', 'content': 0.1700020581483841, 'timestamp': '2025-10-01 04:24:22.894025', 'step': 9388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:22.931685', 'step': 9388, 'epoch': 2} {'type': 'loss', 'content': 0.13086989521980286, 'timestamp': '2025-10-01 04:24:22.933753', 'step': 9389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:22.963941', 'step': 9389, 'epoch': 2} {'type': 'loss', 'content': 0.10495861619710922, 'timestamp': '2025-10-01 04:24:22.966378', 'step': 9390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:22.996981', 'step': 9390, 'epoch': 2} {'type': 'loss', 'content': 0.16153475642204285, 'timestamp': '2025-10-01 04:24:22.999136', 'step': 9391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:23.030035', 'step': 9391, 'epoch': 2} {'type': 'loss', 'content': 0.12277759611606598, 'timestamp': '2025-10-01 04:24:23.053649', 'step': 9392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:23.087720', 'step': 9392, 'epoch': 2} {'type': 'loss', 'content': 0.16399061679840088, 'timestamp': '2025-10-01 04:24:23.089571', 'step': 9393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:23.121172', 'step': 9393, 'epoch': 2} {'type': 'loss', 'content': 0.10355301946401596, 'timestamp': '2025-10-01 04:24:23.123274', 'step': 9394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:23.154049', 'step': 9394, 'epoch': 2} {'type': 'loss', 'content': 0.10325727611780167, 'timestamp': '2025-10-01 04:24:23.156071', 'step': 9395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:23.189695', 'step': 9395, 'epoch': 2} {'type': 'loss', 'content': 0.1673789620399475, 'timestamp': '2025-10-01 04:24:23.216212', 'step': 9396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:23.249396', 'step': 9396, 'epoch': 2} {'type': 'loss', 'content': 0.0775604322552681, 'timestamp': '2025-10-01 04:24:23.251733', 'step': 9397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:23.282758', 'step': 9397, 'epoch': 2} {'type': 'loss', 'content': 0.1767638474702835, 'timestamp': '2025-10-01 04:24:23.284777', 'step': 9398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:23.316627', 'step': 9398, 'epoch': 2} {'type': 'loss', 'content': 0.15669570863246918, 'timestamp': '2025-10-01 04:24:23.322148', 'step': 9399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:23.353846', 'step': 9399, 'epoch': 2} {'type': 'loss', 'content': 0.14302347600460052, 'timestamp': '2025-10-01 04:24:23.378383', 'step': 9400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:23.409423', 'step': 9400, 'epoch': 2} {'type': 'loss', 'content': 0.18783243000507355, 'timestamp': '2025-10-01 04:24:23.411733', 'step': 9401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:23.443684', 'step': 9401, 'epoch': 2} {'type': 'loss', 'content': 0.12883341312408447, 'timestamp': '2025-10-01 04:24:23.450642', 'step': 9402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:23.482295', 'step': 9402, 'epoch': 2} {'type': 'loss', 'content': 0.11064893752336502, 'timestamp': '2025-10-01 04:24:23.484368', 'step': 9403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:23.517714', 'step': 9403, 'epoch': 2} {'type': 'loss', 'content': 0.19675756990909576, 'timestamp': '2025-10-01 04:24:23.541621', 'step': 9404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:23.576690', 'step': 9404, 'epoch': 2} {'type': 'loss', 'content': 0.1988629400730133, 'timestamp': '2025-10-01 04:24:23.579079', 'step': 9405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:23.610685', 'step': 9405, 'epoch': 2} {'type': 'loss', 'content': 0.10939369350671768, 'timestamp': '2025-10-01 04:24:23.612739', 'step': 9406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:23.647740', 'step': 9406, 'epoch': 2} {'type': 'loss', 'content': 0.13928857445716858, 'timestamp': '2025-10-01 04:24:23.650343', 'step': 9407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:23.682009', 'step': 9407, 'epoch': 2} {'type': 'loss', 'content': 0.12639328837394714, 'timestamp': '2025-10-01 04:24:23.705623', 'step': 9408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:23.735495', 'step': 9408, 'epoch': 2} {'type': 'loss', 'content': 0.25835755467414856, 'timestamp': '2025-10-01 04:24:23.737478', 'step': 9409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:23.768029', 'step': 9409, 'epoch': 2} {'type': 'loss', 'content': 0.0925133153796196, 'timestamp': '2025-10-01 04:24:23.769938', 'step': 9410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:23.800790', 'step': 9410, 'epoch': 2} {'type': 'loss', 'content': 0.22413182258605957, 'timestamp': '2025-10-01 04:24:23.802916', 'step': 9411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:23.834933', 'step': 9411, 'epoch': 2} {'type': 'loss', 'content': 0.08929184079170227, 'timestamp': '2025-10-01 04:24:23.859927', 'step': 9412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:23.891626', 'step': 9412, 'epoch': 2} {'type': 'loss', 'content': 0.11511334776878357, 'timestamp': '2025-10-01 04:24:23.893780', 'step': 9413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:23.925477', 'step': 9413, 'epoch': 2} {'type': 'loss', 'content': 0.1019115224480629, 'timestamp': '2025-10-01 04:24:23.928102', 'step': 9414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:23.958652', 'step': 9414, 'epoch': 2} {'type': 'loss', 'content': 0.10661973804235458, 'timestamp': '2025-10-01 04:24:23.960870', 'step': 9415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:23.992143', 'step': 9415, 'epoch': 2} {'type': 'loss', 'content': 0.09884162992238998, 'timestamp': '2025-10-01 04:24:24.022790', 'step': 9416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:24.054528', 'step': 9416, 'epoch': 2} {'type': 'loss', 'content': 0.13709504902362823, 'timestamp': '2025-10-01 04:24:24.056561', 'step': 9417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:24.087685', 'step': 9417, 'epoch': 2} {'type': 'loss', 'content': 0.07987703382968903, 'timestamp': '2025-10-01 04:24:24.089752', 'step': 9418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:24.122385', 'step': 9418, 'epoch': 2} {'type': 'loss', 'content': 0.12144958227872849, 'timestamp': '2025-10-01 04:24:24.124253', 'step': 9419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:24.155457', 'step': 9419, 'epoch': 2} {'type': 'loss', 'content': 0.11584919691085815, 'timestamp': '2025-10-01 04:24:24.179026', 'step': 9420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:24.210377', 'step': 9420, 'epoch': 2} {'type': 'loss', 'content': 0.08149266242980957, 'timestamp': '2025-10-01 04:24:24.214334', 'step': 9421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:24.244323', 'step': 9421, 'epoch': 2} {'type': 'loss', 'content': 0.08440552651882172, 'timestamp': '2025-10-01 04:24:24.252458', 'step': 9422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:24.283097', 'step': 9422, 'epoch': 2} {'type': 'loss', 'content': 0.08079633861780167, 'timestamp': '2025-10-01 04:24:24.285124', 'step': 9423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:24.317205', 'step': 9423, 'epoch': 2} {'type': 'loss', 'content': 0.08883202075958252, 'timestamp': '2025-10-01 04:24:24.341012', 'step': 9424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:24.374700', 'step': 9424, 'epoch': 2} {'type': 'loss', 'content': 0.07148585468530655, 'timestamp': '2025-10-01 04:24:24.376991', 'step': 9425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:24.409687', 'step': 9425, 'epoch': 2} {'type': 'loss', 'content': 0.10117340832948685, 'timestamp': '2025-10-01 04:24:24.412412', 'step': 9426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:24.442580', 'step': 9426, 'epoch': 2} {'type': 'loss', 'content': 0.2299753576517105, 'timestamp': '2025-10-01 04:24:24.444943', 'step': 9427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:24.477273', 'step': 9427, 'epoch': 2} {'type': 'loss', 'content': 0.10719092190265656, 'timestamp': '2025-10-01 04:24:24.501145', 'step': 9428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:24.532504', 'step': 9428, 'epoch': 2} {'type': 'loss', 'content': 0.09367441385984421, 'timestamp': '2025-10-01 04:24:24.534640', 'step': 9429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:24.568945', 'step': 9429, 'epoch': 2} {'type': 'loss', 'content': 0.1222991943359375, 'timestamp': '2025-10-01 04:24:24.581443', 'step': 9430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:24.613493', 'step': 9430, 'epoch': 2} {'type': 'loss', 'content': 0.13900741934776306, 'timestamp': '2025-10-01 04:24:24.616326', 'step': 9431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:24.647316', 'step': 9431, 'epoch': 2} {'type': 'loss', 'content': 0.08879522979259491, 'timestamp': '2025-10-01 04:24:24.670971', 'step': 9432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:24.706427', 'step': 9432, 'epoch': 2} {'type': 'loss', 'content': 0.09042885154485703, 'timestamp': '2025-10-01 04:24:24.708499', 'step': 9433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:24.739102', 'step': 9433, 'epoch': 2} {'type': 'loss', 'content': 0.12333657592535019, 'timestamp': '2025-10-01 04:24:24.741177', 'step': 9434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:24.772325', 'step': 9434, 'epoch': 2} {'type': 'loss', 'content': 0.08133327960968018, 'timestamp': '2025-10-01 04:24:24.774594', 'step': 9435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:24.805225', 'step': 9435, 'epoch': 2} {'type': 'loss', 'content': 0.10370371490716934, 'timestamp': '2025-10-01 04:24:24.828984', 'step': 9436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:24.859870', 'step': 9436, 'epoch': 2} {'type': 'loss', 'content': 0.10328693687915802, 'timestamp': '2025-10-01 04:24:24.862229', 'step': 9437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:24.893300', 'step': 9437, 'epoch': 2} {'type': 'loss', 'content': 0.11835138499736786, 'timestamp': '2025-10-01 04:24:24.895368', 'step': 9438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:24.925433', 'step': 9438, 'epoch': 2} {'type': 'loss', 'content': 0.164521723985672, 'timestamp': '2025-10-01 04:24:24.928101', 'step': 9439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:24.958208', 'step': 9439, 'epoch': 2} {'type': 'loss', 'content': 0.11077453941106796, 'timestamp': '2025-10-01 04:24:24.981787', 'step': 9440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:25.021269', 'step': 9440, 'epoch': 2} {'type': 'loss', 'content': 0.10113025456666946, 'timestamp': '2025-10-01 04:24:25.023309', 'step': 9441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:25.054008', 'step': 9441, 'epoch': 2} {'type': 'loss', 'content': 0.20263956487178802, 'timestamp': '2025-10-01 04:24:25.057663', 'step': 9442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:25.088337', 'step': 9442, 'epoch': 2} {'type': 'loss', 'content': 0.11127963662147522, 'timestamp': '2025-10-01 04:24:25.090499', 'step': 9443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:25.122740', 'step': 9443, 'epoch': 2} {'type': 'loss', 'content': 0.1254912167787552, 'timestamp': '2025-10-01 04:24:25.146884', 'step': 9444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:25.178454', 'step': 9444, 'epoch': 2} {'type': 'loss', 'content': 0.06218496710062027, 'timestamp': '2025-10-01 04:24:25.180781', 'step': 9445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:25.210930', 'step': 9445, 'epoch': 2} {'type': 'loss', 'content': 0.07999194413423538, 'timestamp': '2025-10-01 04:24:25.213439', 'step': 9446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:25.245584', 'step': 9446, 'epoch': 2} {'type': 'loss', 'content': 0.1201184093952179, 'timestamp': '2025-10-01 04:24:25.247634', 'step': 9447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:25.284292', 'step': 9447, 'epoch': 2} {'type': 'loss', 'content': 0.10074570029973984, 'timestamp': '2025-10-01 04:24:25.308098', 'step': 9448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:25.338295', 'step': 9448, 'epoch': 2} {'type': 'loss', 'content': 0.1935901939868927, 'timestamp': '2025-10-01 04:24:25.340338', 'step': 9449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:25.372174', 'step': 9449, 'epoch': 2} {'type': 'loss', 'content': 0.05560872703790665, 'timestamp': '2025-10-01 04:24:25.374399', 'step': 9450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:25.406363', 'step': 9450, 'epoch': 2} {'type': 'loss', 'content': 0.11704842746257782, 'timestamp': '2025-10-01 04:24:25.409303', 'step': 9451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:25.438923', 'step': 9451, 'epoch': 2} {'type': 'loss', 'content': 0.09069440513849258, 'timestamp': '2025-10-01 04:24:25.462717', 'step': 9452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:25.493988', 'step': 9452, 'epoch': 2} {'type': 'loss', 'content': 0.0644928514957428, 'timestamp': '2025-10-01 04:24:25.496023', 'step': 9453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:25.527229', 'step': 9453, 'epoch': 2} {'type': 'loss', 'content': 0.13149695098400116, 'timestamp': '2025-10-01 04:24:25.529526', 'step': 9454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:25.570802', 'step': 9454, 'epoch': 2} {'type': 'loss', 'content': 0.17096568644046783, 'timestamp': '2025-10-01 04:24:25.572895', 'step': 9455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:25.604807', 'step': 9455, 'epoch': 2} {'type': 'loss', 'content': 0.12805064022541046, 'timestamp': '2025-10-01 04:24:25.628713', 'step': 9456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:25.673033', 'step': 9456, 'epoch': 2} {'type': 'loss', 'content': 0.17387571930885315, 'timestamp': '2025-10-01 04:24:25.675219', 'step': 9457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:25.708172', 'step': 9457, 'epoch': 2} {'type': 'loss', 'content': 0.12010107189416885, 'timestamp': '2025-10-01 04:24:25.711026', 'step': 9458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:25.742791', 'step': 9458, 'epoch': 2} {'type': 'loss', 'content': 0.08104909211397171, 'timestamp': '2025-10-01 04:24:25.745365', 'step': 9459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:25.787086', 'step': 9459, 'epoch': 2} {'type': 'loss', 'content': 0.09270068258047104, 'timestamp': '2025-10-01 04:24:25.818889', 'step': 9460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:25.850524', 'step': 9460, 'epoch': 2} {'type': 'loss', 'content': 0.04993777722120285, 'timestamp': '2025-10-01 04:24:25.852981', 'step': 9461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:25.885433', 'step': 9461, 'epoch': 2} {'type': 'loss', 'content': 0.14700843393802643, 'timestamp': '2025-10-01 04:24:25.887462', 'step': 9462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:25.918188', 'step': 9462, 'epoch': 2} {'type': 'loss', 'content': 0.12017706036567688, 'timestamp': '2025-10-01 04:24:25.928643', 'step': 9463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:25.959719', 'step': 9463, 'epoch': 2} {'type': 'loss', 'content': 0.1515280157327652, 'timestamp': '2025-10-01 04:24:25.982964', 'step': 9464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:26.018048', 'step': 9464, 'epoch': 2} {'type': 'loss', 'content': 0.06302747875452042, 'timestamp': '2025-10-01 04:24:26.020106', 'step': 9465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:26.052148', 'step': 9465, 'epoch': 2} {'type': 'loss', 'content': 0.08205224573612213, 'timestamp': '2025-10-01 04:24:26.054130', 'step': 9466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:26.083608', 'step': 9466, 'epoch': 2} {'type': 'loss', 'content': 0.1997491866350174, 'timestamp': '2025-10-01 04:24:26.085737', 'step': 9467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:26.117687', 'step': 9467, 'epoch': 2} {'type': 'loss', 'content': 0.17292766273021698, 'timestamp': '2025-10-01 04:24:26.141046', 'step': 9468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:26.172230', 'step': 9468, 'epoch': 2} {'type': 'loss', 'content': 0.08149491995573044, 'timestamp': '2025-10-01 04:24:26.174400', 'step': 9469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:26.205847', 'step': 9469, 'epoch': 2} {'type': 'loss', 'content': 0.1462773084640503, 'timestamp': '2025-10-01 04:24:26.213958', 'step': 9470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:26.244269', 'step': 9470, 'epoch': 2} {'type': 'loss', 'content': 0.06762692332267761, 'timestamp': '2025-10-01 04:24:26.246204', 'step': 9471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:26.276690', 'step': 9471, 'epoch': 2} {'type': 'loss', 'content': 0.10333020240068436, 'timestamp': '2025-10-01 04:24:26.300760', 'step': 9472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:26.331599', 'step': 9472, 'epoch': 2} {'type': 'loss', 'content': 0.21846361458301544, 'timestamp': '2025-10-01 04:24:26.333759', 'step': 9473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:26.365034', 'step': 9473, 'epoch': 2} {'type': 'loss', 'content': 0.1256718784570694, 'timestamp': '2025-10-01 04:24:26.377891', 'step': 9474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:26.408548', 'step': 9474, 'epoch': 2} {'type': 'loss', 'content': 0.11520131677389145, 'timestamp': '2025-10-01 04:24:26.410688', 'step': 9475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:26.443007', 'step': 9475, 'epoch': 2} {'type': 'loss', 'content': 0.06606074422597885, 'timestamp': '2025-10-01 04:24:26.472469', 'step': 9476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:26.502985', 'step': 9476, 'epoch': 2} {'type': 'loss', 'content': 0.07368847727775574, 'timestamp': '2025-10-01 04:24:26.505447', 'step': 9477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:26.535804', 'step': 9477, 'epoch': 2} {'type': 'loss', 'content': 0.15432371199131012, 'timestamp': '2025-10-01 04:24:26.537875', 'step': 9478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:26.568524', 'step': 9478, 'epoch': 2} {'type': 'loss', 'content': 0.09487113356590271, 'timestamp': '2025-10-01 04:24:26.570639', 'step': 9479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:26.601732', 'step': 9479, 'epoch': 2} {'type': 'loss', 'content': 0.19769704341888428, 'timestamp': '2025-10-01 04:24:26.625250', 'step': 9480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:26.662871', 'step': 9480, 'epoch': 2} {'type': 'loss', 'content': 0.09402743726968765, 'timestamp': '2025-10-01 04:24:26.664980', 'step': 9481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:26.694795', 'step': 9481, 'epoch': 2} {'type': 'loss', 'content': 0.1349174678325653, 'timestamp': '2025-10-01 04:24:26.704568', 'step': 9482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:26.735095', 'step': 9482, 'epoch': 2} {'type': 'loss', 'content': 0.15169279277324677, 'timestamp': '2025-10-01 04:24:26.737282', 'step': 9483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:26.768387', 'step': 9483, 'epoch': 2} {'type': 'loss', 'content': 0.12119647860527039, 'timestamp': '2025-10-01 04:24:26.792044', 'step': 9484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:26.823110', 'step': 9484, 'epoch': 2} {'type': 'loss', 'content': 0.16528497636318207, 'timestamp': '2025-10-01 04:24:26.825705', 'step': 9485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:26.855958', 'step': 9485, 'epoch': 2} {'type': 'loss', 'content': 0.0906326025724411, 'timestamp': '2025-10-01 04:24:26.857949', 'step': 9486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:26.887700', 'step': 9486, 'epoch': 2} {'type': 'loss', 'content': 0.10879718512296677, 'timestamp': '2025-10-01 04:24:26.890264', 'step': 9487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:26.921173', 'step': 9487, 'epoch': 2} {'type': 'loss', 'content': 0.10468801110982895, 'timestamp': '2025-10-01 04:24:26.944930', 'step': 9488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:26.975298', 'step': 9488, 'epoch': 2} {'type': 'loss', 'content': 0.13390721380710602, 'timestamp': '2025-10-01 04:24:26.977375', 'step': 9489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:27.007435', 'step': 9489, 'epoch': 2} {'type': 'loss', 'content': 0.11083510518074036, 'timestamp': '2025-10-01 04:24:27.009485', 'step': 9490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:27.040683', 'step': 9490, 'epoch': 2} {'type': 'loss', 'content': 0.04328524321317673, 'timestamp': '2025-10-01 04:24:27.042750', 'step': 9491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:27.073685', 'step': 9491, 'epoch': 2} {'type': 'loss', 'content': 0.08882364630699158, 'timestamp': '2025-10-01 04:24:27.112341', 'step': 9492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:27.151356', 'step': 9492, 'epoch': 2} {'type': 'loss', 'content': 0.21866905689239502, 'timestamp': '2025-10-01 04:24:27.165667', 'step': 9493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:27.195757', 'step': 9493, 'epoch': 2} {'type': 'loss', 'content': 0.11654284596443176, 'timestamp': '2025-10-01 04:24:27.206765', 'step': 9494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:27.238023', 'step': 9494, 'epoch': 2} {'type': 'loss', 'content': 0.06385654211044312, 'timestamp': '2025-10-01 04:24:27.242945', 'step': 9495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:27.273183', 'step': 9495, 'epoch': 2} {'type': 'loss', 'content': 0.15697069466114044, 'timestamp': '2025-10-01 04:24:27.296860', 'step': 9496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:27.327529', 'step': 9496, 'epoch': 2} {'type': 'loss', 'content': 0.08577975630760193, 'timestamp': '2025-10-01 04:24:27.329679', 'step': 9497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:27.361426', 'step': 9497, 'epoch': 2} {'type': 'loss', 'content': 0.15540502965450287, 'timestamp': '2025-10-01 04:24:27.364131', 'step': 9498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:27.394445', 'step': 9498, 'epoch': 2} {'type': 'loss', 'content': 0.186123326420784, 'timestamp': '2025-10-01 04:24:27.396899', 'step': 9499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:27.427210', 'step': 9499, 'epoch': 2} {'type': 'loss', 'content': 0.10501914471387863, 'timestamp': '2025-10-01 04:24:27.451005', 'step': 9500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9500', 'timestamp': '2025-10-01 04:24:33.758672', 'step': 9500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:33.789580', 'step': 9500, 'epoch': 2} {'type': 'loss', 'content': 0.16855722665786743, 'timestamp': '2025-10-01 04:24:33.792052', 'step': 9501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:33.824722', 'step': 9501, 'epoch': 2} {'type': 'loss', 'content': 0.08412615954875946, 'timestamp': '2025-10-01 04:24:33.826981', 'step': 9502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:33.857685', 'step': 9502, 'epoch': 2} {'type': 'loss', 'content': 0.11183090507984161, 'timestamp': '2025-10-01 04:24:33.860673', 'step': 9503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:33.893214', 'step': 9503, 'epoch': 2} {'type': 'loss', 'content': 0.06467397511005402, 'timestamp': '2025-10-01 04:24:33.917393', 'step': 9504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:33.950316', 'step': 9504, 'epoch': 2} {'type': 'loss', 'content': 0.11567256599664688, 'timestamp': '2025-10-01 04:24:33.952349', 'step': 9505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:33.984471', 'step': 9505, 'epoch': 2} {'type': 'loss', 'content': 0.024225307628512383, 'timestamp': '2025-10-01 04:24:33.986893', 'step': 9506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:34.018836', 'step': 9506, 'epoch': 2} {'type': 'loss', 'content': 0.12512074410915375, 'timestamp': '2025-10-01 04:24:34.020866', 'step': 9507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:34.052870', 'step': 9507, 'epoch': 2} {'type': 'loss', 'content': 0.1726784110069275, 'timestamp': '2025-10-01 04:24:34.076530', 'step': 9508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:34.109032', 'step': 9508, 'epoch': 2} {'type': 'loss', 'content': 0.06639721989631653, 'timestamp': '2025-10-01 04:24:34.110989', 'step': 9509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:34.142473', 'step': 9509, 'epoch': 2} {'type': 'loss', 'content': 0.14441177248954773, 'timestamp': '2025-10-01 04:24:34.149333', 'step': 9510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:34.183950', 'step': 9510, 'epoch': 2} {'type': 'loss', 'content': 0.13405489921569824, 'timestamp': '2025-10-01 04:24:34.186346', 'step': 9511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:34.219738', 'step': 9511, 'epoch': 2} {'type': 'loss', 'content': 0.11147456616163254, 'timestamp': '2025-10-01 04:24:34.249990', 'step': 9512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:34.282801', 'step': 9512, 'epoch': 2} {'type': 'loss', 'content': 0.2347123771905899, 'timestamp': '2025-10-01 04:24:34.285274', 'step': 9513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:34.317942', 'step': 9513, 'epoch': 2} {'type': 'loss', 'content': 0.1059475764632225, 'timestamp': '2025-10-01 04:24:34.320171', 'step': 9514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:34.353549', 'step': 9514, 'epoch': 2} {'type': 'loss', 'content': 0.06464715301990509, 'timestamp': '2025-10-01 04:24:34.355936', 'step': 9515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:34.387958', 'step': 9515, 'epoch': 2} {'type': 'loss', 'content': 0.07257339358329773, 'timestamp': '2025-10-01 04:24:34.411952', 'step': 9516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:34.442336', 'step': 9516, 'epoch': 2} {'type': 'loss', 'content': 0.1165047138929367, 'timestamp': '2025-10-01 04:24:34.444868', 'step': 9517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:34.609429', 'step': 9517, 'epoch': 2} {'type': 'loss', 'content': 0.1662355661392212, 'timestamp': '2025-10-01 04:24:34.611814', 'step': 9518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:34.641876', 'step': 9518, 'epoch': 2} {'type': 'loss', 'content': 0.1395493894815445, 'timestamp': '2025-10-01 04:24:34.643937', 'step': 9519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:34.674888', 'step': 9519, 'epoch': 2} {'type': 'loss', 'content': 0.12012658268213272, 'timestamp': '2025-10-01 04:24:34.700576', 'step': 9520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:34.732338', 'step': 9520, 'epoch': 2} {'type': 'loss', 'content': 0.09101766347885132, 'timestamp': '2025-10-01 04:24:34.735157', 'step': 9521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:34.766906', 'step': 9521, 'epoch': 2} {'type': 'loss', 'content': 0.0941113531589508, 'timestamp': '2025-10-01 04:24:34.775121', 'step': 9522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:34.813238', 'step': 9522, 'epoch': 2} {'type': 'loss', 'content': 0.14637215435504913, 'timestamp': '2025-10-01 04:24:34.815153', 'step': 9523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:34.846753', 'step': 9523, 'epoch': 2} {'type': 'loss', 'content': 0.18063291907310486, 'timestamp': '2025-10-01 04:24:34.870842', 'step': 9524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:34.904889', 'step': 9524, 'epoch': 2} {'type': 'loss', 'content': 0.12892739474773407, 'timestamp': '2025-10-01 04:24:34.906944', 'step': 9525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:34.938331', 'step': 9525, 'epoch': 2} {'type': 'loss', 'content': 0.10710538923740387, 'timestamp': '2025-10-01 04:24:34.940728', 'step': 9526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:34.971925', 'step': 9526, 'epoch': 2} {'type': 'loss', 'content': 0.14584411680698395, 'timestamp': '2025-10-01 04:24:34.974129', 'step': 9527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:35.006707', 'step': 9527, 'epoch': 2} {'type': 'loss', 'content': 0.12808933854103088, 'timestamp': '2025-10-01 04:24:35.030219', 'step': 9528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:35.062008', 'step': 9528, 'epoch': 2} {'type': 'loss', 'content': 0.14453041553497314, 'timestamp': '2025-10-01 04:24:35.064018', 'step': 9529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:35.099136', 'step': 9529, 'epoch': 2} {'type': 'loss', 'content': 0.10924024879932404, 'timestamp': '2025-10-01 04:24:35.101372', 'step': 9530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:35.131686', 'step': 9530, 'epoch': 2} {'type': 'loss', 'content': 0.17257291078567505, 'timestamp': '2025-10-01 04:24:35.134279', 'step': 9531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.167154', 'step': 9531, 'epoch': 2} {'type': 'loss', 'content': 0.14714676141738892, 'timestamp': '2025-10-01 04:24:35.191273', 'step': 9532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.221954', 'step': 9532, 'epoch': 2} {'type': 'loss', 'content': 0.10730145126581192, 'timestamp': '2025-10-01 04:24:35.224005', 'step': 9533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:35.255170', 'step': 9533, 'epoch': 2} {'type': 'loss', 'content': 0.0954069271683693, 'timestamp': '2025-10-01 04:24:35.268252', 'step': 9534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.298339', 'step': 9534, 'epoch': 2} {'type': 'loss', 'content': 0.08054357767105103, 'timestamp': '2025-10-01 04:24:35.300328', 'step': 9535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:35.330684', 'step': 9535, 'epoch': 2} {'type': 'loss', 'content': 0.07953158766031265, 'timestamp': '2025-10-01 04:24:35.356240', 'step': 9536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:35.387859', 'step': 9536, 'epoch': 2} {'type': 'loss', 'content': 0.1782609522342682, 'timestamp': '2025-10-01 04:24:35.389899', 'step': 9537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.420183', 'step': 9537, 'epoch': 2} {'type': 'loss', 'content': 0.14798946678638458, 'timestamp': '2025-10-01 04:24:35.428672', 'step': 9538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:35.459747', 'step': 9538, 'epoch': 2} {'type': 'loss', 'content': 0.10408241301774979, 'timestamp': '2025-10-01 04:24:35.462245', 'step': 9539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:35.492974', 'step': 9539, 'epoch': 2} {'type': 'loss', 'content': 0.12767276167869568, 'timestamp': '2025-10-01 04:24:35.516474', 'step': 9540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:35.546472', 'step': 9540, 'epoch': 2} {'type': 'loss', 'content': 0.08909877389669418, 'timestamp': '2025-10-01 04:24:35.548512', 'step': 9541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:35.579905', 'step': 9541, 'epoch': 2} {'type': 'loss', 'content': 0.10331664979457855, 'timestamp': '2025-10-01 04:24:35.582067', 'step': 9542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.612629', 'step': 9542, 'epoch': 2} {'type': 'loss', 'content': 0.162761390209198, 'timestamp': '2025-10-01 04:24:35.614746', 'step': 9543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.645026', 'step': 9543, 'epoch': 2} {'type': 'loss', 'content': 0.11637042462825775, 'timestamp': '2025-10-01 04:24:35.668727', 'step': 9544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:35.702355', 'step': 9544, 'epoch': 2} {'type': 'loss', 'content': 0.06601928174495697, 'timestamp': '2025-10-01 04:24:35.704593', 'step': 9545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:35.737044', 'step': 9545, 'epoch': 2} {'type': 'loss', 'content': 0.07076819986104965, 'timestamp': '2025-10-01 04:24:35.740457', 'step': 9546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.782625', 'step': 9546, 'epoch': 2} {'type': 'loss', 'content': 0.12930402159690857, 'timestamp': '2025-10-01 04:24:35.798095', 'step': 9547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:35.829759', 'step': 9547, 'epoch': 2} {'type': 'loss', 'content': 0.07966737449169159, 'timestamp': '2025-10-01 04:24:35.853372', 'step': 9548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.883623', 'step': 9548, 'epoch': 2} {'type': 'loss', 'content': 0.10537512600421906, 'timestamp': '2025-10-01 04:24:35.885735', 'step': 9549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.915571', 'step': 9549, 'epoch': 2} {'type': 'loss', 'content': 0.04732644930481911, 'timestamp': '2025-10-01 04:24:35.920018', 'step': 9550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.951520', 'step': 9550, 'epoch': 2} {'type': 'loss', 'content': 0.13874535262584686, 'timestamp': '2025-10-01 04:24:35.955263', 'step': 9551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:35.986184', 'step': 9551, 'epoch': 2} {'type': 'loss', 'content': 0.15554802119731903, 'timestamp': '2025-10-01 04:24:36.009935', 'step': 9552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:36.040900', 'step': 9552, 'epoch': 2} {'type': 'loss', 'content': 0.1377507597208023, 'timestamp': '2025-10-01 04:24:36.043167', 'step': 9553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:36.080231', 'step': 9553, 'epoch': 2} {'type': 'loss', 'content': 0.17427146434783936, 'timestamp': '2025-10-01 04:24:36.082274', 'step': 9554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:36.113106', 'step': 9554, 'epoch': 2} {'type': 'loss', 'content': 0.0782175362110138, 'timestamp': '2025-10-01 04:24:36.118801', 'step': 9555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:36.151209', 'step': 9555, 'epoch': 2} {'type': 'loss', 'content': 0.08779985457658768, 'timestamp': '2025-10-01 04:24:36.176115', 'step': 9556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:36.207224', 'step': 9556, 'epoch': 2} {'type': 'loss', 'content': 0.15574592351913452, 'timestamp': '2025-10-01 04:24:36.209205', 'step': 9557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:36.240157', 'step': 9557, 'epoch': 2} {'type': 'loss', 'content': 0.2059728503227234, 'timestamp': '2025-10-01 04:24:36.242460', 'step': 9558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:36.272981', 'step': 9558, 'epoch': 2} {'type': 'loss', 'content': 0.10538516193628311, 'timestamp': '2025-10-01 04:24:36.275063', 'step': 9559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:36.315565', 'step': 9559, 'epoch': 2} {'type': 'loss', 'content': 0.1252470314502716, 'timestamp': '2025-10-01 04:24:36.339782', 'step': 9560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:36.370067', 'step': 9560, 'epoch': 2} {'type': 'loss', 'content': 0.13340607285499573, 'timestamp': '2025-10-01 04:24:36.375189', 'step': 9561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:36.405938', 'step': 9561, 'epoch': 2} {'type': 'loss', 'content': 0.14325526356697083, 'timestamp': '2025-10-01 04:24:36.408129', 'step': 9562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:36.438574', 'step': 9562, 'epoch': 2} {'type': 'loss', 'content': 0.1289350390434265, 'timestamp': '2025-10-01 04:24:36.441121', 'step': 9563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:36.477089', 'step': 9563, 'epoch': 2} {'type': 'loss', 'content': 0.10939788818359375, 'timestamp': '2025-10-01 04:24:36.501912', 'step': 9564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:36.531888', 'step': 9564, 'epoch': 2} {'type': 'loss', 'content': 0.14818543195724487, 'timestamp': '2025-10-01 04:24:36.533867', 'step': 9565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:36.564522', 'step': 9565, 'epoch': 2} {'type': 'loss', 'content': 0.19612440466880798, 'timestamp': '2025-10-01 04:24:36.566575', 'step': 9566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:36.596989', 'step': 9566, 'epoch': 2} {'type': 'loss', 'content': 0.11566939949989319, 'timestamp': '2025-10-01 04:24:36.599482', 'step': 9567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:36.633194', 'step': 9567, 'epoch': 2} {'type': 'loss', 'content': 0.12185736745595932, 'timestamp': '2025-10-01 04:24:36.656692', 'step': 9568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:36.693062', 'step': 9568, 'epoch': 2} {'type': 'loss', 'content': 0.09777882695198059, 'timestamp': '2025-10-01 04:24:36.695092', 'step': 9569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:36.725317', 'step': 9569, 'epoch': 2} {'type': 'loss', 'content': 0.091995008289814, 'timestamp': '2025-10-01 04:24:36.727513', 'step': 9570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:36.757322', 'step': 9570, 'epoch': 2} {'type': 'loss', 'content': 0.1550690233707428, 'timestamp': '2025-10-01 04:24:36.769130', 'step': 9571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:36.799642', 'step': 9571, 'epoch': 2} {'type': 'loss', 'content': 0.13149785995483398, 'timestamp': '2025-10-01 04:24:36.823146', 'step': 9572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:36.855772', 'step': 9572, 'epoch': 2} {'type': 'loss', 'content': 0.11955319344997406, 'timestamp': '2025-10-01 04:24:36.857874', 'step': 9573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:36.888150', 'step': 9573, 'epoch': 2} {'type': 'loss', 'content': 0.13350725173950195, 'timestamp': '2025-10-01 04:24:36.891008', 'step': 9574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:36.920964', 'step': 9574, 'epoch': 2} {'type': 'loss', 'content': 0.1998504251241684, 'timestamp': '2025-10-01 04:24:36.923889', 'step': 9575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:36.957437', 'step': 9575, 'epoch': 2} {'type': 'loss', 'content': 0.13450992107391357, 'timestamp': '2025-10-01 04:24:36.982408', 'step': 9576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:37.012911', 'step': 9576, 'epoch': 2} {'type': 'loss', 'content': 0.11583270132541656, 'timestamp': '2025-10-01 04:24:37.018844', 'step': 9577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:37.054322', 'step': 9577, 'epoch': 2} {'type': 'loss', 'content': 0.10318171232938766, 'timestamp': '2025-10-01 04:24:37.056390', 'step': 9578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:37.086409', 'step': 9578, 'epoch': 2} {'type': 'loss', 'content': 0.0764397457242012, 'timestamp': '2025-10-01 04:24:37.088536', 'step': 9579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:37.118633', 'step': 9579, 'epoch': 2} {'type': 'loss', 'content': 0.10927402973175049, 'timestamp': '2025-10-01 04:24:37.142107', 'step': 9580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:37.172765', 'step': 9580, 'epoch': 2} {'type': 'loss', 'content': 0.09613364934921265, 'timestamp': '2025-10-01 04:24:37.177072', 'step': 9581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:37.209084', 'step': 9581, 'epoch': 2} {'type': 'loss', 'content': 0.12168227881193161, 'timestamp': '2025-10-01 04:24:37.211571', 'step': 9582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:37.241548', 'step': 9582, 'epoch': 2} {'type': 'loss', 'content': 0.12448439002037048, 'timestamp': '2025-10-01 04:24:37.243560', 'step': 9583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:37.273989', 'step': 9583, 'epoch': 2} {'type': 'loss', 'content': 0.08475673943758011, 'timestamp': '2025-10-01 04:24:37.303027', 'step': 9584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:37.332690', 'step': 9584, 'epoch': 2} {'type': 'loss', 'content': 0.1097613200545311, 'timestamp': '2025-10-01 04:24:37.336997', 'step': 9585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:37.367709', 'step': 9585, 'epoch': 2} {'type': 'loss', 'content': 0.1330990046262741, 'timestamp': '2025-10-01 04:24:37.369985', 'step': 9586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:37.400240', 'step': 9586, 'epoch': 2} {'type': 'loss', 'content': 0.08369710296392441, 'timestamp': '2025-10-01 04:24:37.402580', 'step': 9587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:37.432871', 'step': 9587, 'epoch': 2} {'type': 'loss', 'content': 0.20553073287010193, 'timestamp': '2025-10-01 04:24:37.456386', 'step': 9588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:37.487711', 'step': 9588, 'epoch': 2} {'type': 'loss', 'content': 0.12192665785551071, 'timestamp': '2025-10-01 04:24:37.489870', 'step': 9589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:37.520385', 'step': 9589, 'epoch': 2} {'type': 'loss', 'content': 0.08335499465465546, 'timestamp': '2025-10-01 04:24:37.522988', 'step': 9590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:37.553088', 'step': 9590, 'epoch': 2} {'type': 'loss', 'content': 0.08801195025444031, 'timestamp': '2025-10-01 04:24:37.555201', 'step': 9591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:37.585178', 'step': 9591, 'epoch': 2} {'type': 'loss', 'content': 0.20114731788635254, 'timestamp': '2025-10-01 04:24:37.608782', 'step': 9592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:37.638860', 'step': 9592, 'epoch': 2} {'type': 'loss', 'content': 0.07371952384710312, 'timestamp': '2025-10-01 04:24:37.640964', 'step': 9593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:37.672683', 'step': 9593, 'epoch': 2} {'type': 'loss', 'content': 0.059562310576438904, 'timestamp': '2025-10-01 04:24:37.674651', 'step': 9594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:37.705661', 'step': 9594, 'epoch': 2} {'type': 'loss', 'content': 0.23332050442695618, 'timestamp': '2025-10-01 04:24:37.708071', 'step': 9595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:37.737854', 'step': 9595, 'epoch': 2} {'type': 'loss', 'content': 0.042857639491558075, 'timestamp': '2025-10-01 04:24:37.761469', 'step': 9596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:37.791721', 'step': 9596, 'epoch': 2} {'type': 'loss', 'content': 0.12338428199291229, 'timestamp': '2025-10-01 04:24:37.795167', 'step': 9597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:37.825265', 'step': 9597, 'epoch': 2} {'type': 'loss', 'content': 0.18547122180461884, 'timestamp': '2025-10-01 04:24:37.827370', 'step': 9598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:37.858224', 'step': 9598, 'epoch': 2} {'type': 'loss', 'content': 0.077193982899189, 'timestamp': '2025-10-01 04:24:37.860696', 'step': 9599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:37.890590', 'step': 9599, 'epoch': 2} {'type': 'loss', 'content': 0.07979360222816467, 'timestamp': '2025-10-01 04:24:37.917377', 'step': 9600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:37.947498', 'step': 9600, 'epoch': 2} {'type': 'loss', 'content': 0.1506839096546173, 'timestamp': '2025-10-01 04:24:37.949505', 'step': 9601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:37.980134', 'step': 9601, 'epoch': 2} {'type': 'loss', 'content': 0.08038878440856934, 'timestamp': '2025-10-01 04:24:37.982883', 'step': 9602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:38.012681', 'step': 9602, 'epoch': 2} {'type': 'loss', 'content': 0.11218228936195374, 'timestamp': '2025-10-01 04:24:38.015286', 'step': 9603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:38.045869', 'step': 9603, 'epoch': 2} {'type': 'loss', 'content': 0.11967603862285614, 'timestamp': '2025-10-01 04:24:38.069650', 'step': 9604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:38.101534', 'step': 9604, 'epoch': 2} {'type': 'loss', 'content': 0.09767235815525055, 'timestamp': '2025-10-01 04:24:38.103985', 'step': 9605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:38.134885', 'step': 9605, 'epoch': 2} {'type': 'loss', 'content': 0.10770987719297409, 'timestamp': '2025-10-01 04:24:38.137010', 'step': 9606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:38.167681', 'step': 9606, 'epoch': 2} {'type': 'loss', 'content': 0.11105930060148239, 'timestamp': '2025-10-01 04:24:38.170143', 'step': 9607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:38.200018', 'step': 9607, 'epoch': 2} {'type': 'loss', 'content': 0.0951768085360527, 'timestamp': '2025-10-01 04:24:38.223587', 'step': 9608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:38.253331', 'step': 9608, 'epoch': 2} {'type': 'loss', 'content': 0.09365420043468475, 'timestamp': '2025-10-01 04:24:38.255382', 'step': 9609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:38.286506', 'step': 9609, 'epoch': 2} {'type': 'loss', 'content': 0.08067630231380463, 'timestamp': '2025-10-01 04:24:38.288675', 'step': 9610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:38.319992', 'step': 9610, 'epoch': 2} {'type': 'loss', 'content': 0.15187303721904755, 'timestamp': '2025-10-01 04:24:38.323126', 'step': 9611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:38.353840', 'step': 9611, 'epoch': 2} {'type': 'loss', 'content': 0.10741758346557617, 'timestamp': '2025-10-01 04:24:38.377376', 'step': 9612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:38.414127', 'step': 9612, 'epoch': 2} {'type': 'loss', 'content': 0.05900576338171959, 'timestamp': '2025-10-01 04:24:38.416578', 'step': 9613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:38.451920', 'step': 9613, 'epoch': 2} {'type': 'loss', 'content': 0.08480729162693024, 'timestamp': '2025-10-01 04:24:38.460076', 'step': 9614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:38.490908', 'step': 9614, 'epoch': 2} {'type': 'loss', 'content': 0.11522805690765381, 'timestamp': '2025-10-01 04:24:38.493300', 'step': 9615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:38.528600', 'step': 9615, 'epoch': 2} {'type': 'loss', 'content': 0.16028690338134766, 'timestamp': '2025-10-01 04:24:38.552750', 'step': 9616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:38.583196', 'step': 9616, 'epoch': 2} {'type': 'loss', 'content': 0.12435895949602127, 'timestamp': '2025-10-01 04:24:38.585539', 'step': 9617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:38.615687', 'step': 9617, 'epoch': 2} {'type': 'loss', 'content': 0.049267347902059555, 'timestamp': '2025-10-01 04:24:38.618698', 'step': 9618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:38.655553', 'step': 9618, 'epoch': 2} {'type': 'loss', 'content': 0.14919285476207733, 'timestamp': '2025-10-01 04:24:38.657760', 'step': 9619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:38.688475', 'step': 9619, 'epoch': 2} {'type': 'loss', 'content': 0.09914137423038483, 'timestamp': '2025-10-01 04:24:38.711916', 'step': 9620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:38.741790', 'step': 9620, 'epoch': 2} {'type': 'loss', 'content': 0.09123196452856064, 'timestamp': '2025-10-01 04:24:38.743870', 'step': 9621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:38.773581', 'step': 9621, 'epoch': 2} {'type': 'loss', 'content': 0.19483298063278198, 'timestamp': '2025-10-01 04:24:38.775829', 'step': 9622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:38.807565', 'step': 9622, 'epoch': 2} {'type': 'loss', 'content': 0.15530720353126526, 'timestamp': '2025-10-01 04:24:38.814605', 'step': 9623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:38.844543', 'step': 9623, 'epoch': 2} {'type': 'loss', 'content': 0.161647766828537, 'timestamp': '2025-10-01 04:24:38.868114', 'step': 9624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:38.899144', 'step': 9624, 'epoch': 2} {'type': 'loss', 'content': 0.12906044721603394, 'timestamp': '2025-10-01 04:24:38.901133', 'step': 9625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:38.931462', 'step': 9625, 'epoch': 2} {'type': 'loss', 'content': 0.08223239332437515, 'timestamp': '2025-10-01 04:24:38.936739', 'step': 9626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:38.967183', 'step': 9626, 'epoch': 2} {'type': 'loss', 'content': 0.12235561013221741, 'timestamp': '2025-10-01 04:24:38.969325', 'step': 9627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.000094', 'step': 9627, 'epoch': 2} {'type': 'loss', 'content': 0.2412043809890747, 'timestamp': '2025-10-01 04:24:39.023807', 'step': 9628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.054186', 'step': 9628, 'epoch': 2} {'type': 'loss', 'content': 0.14180538058280945, 'timestamp': '2025-10-01 04:24:39.056187', 'step': 9629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:39.086308', 'step': 9629, 'epoch': 2} {'type': 'loss', 'content': 0.13223029673099518, 'timestamp': '2025-10-01 04:24:39.089750', 'step': 9630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:39.120095', 'step': 9630, 'epoch': 2} {'type': 'loss', 'content': 0.09621331840753555, 'timestamp': '2025-10-01 04:24:39.122085', 'step': 9631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:39.153544', 'step': 9631, 'epoch': 2} {'type': 'loss', 'content': 0.21718323230743408, 'timestamp': '2025-10-01 04:24:39.177149', 'step': 9632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.207605', 'step': 9632, 'epoch': 2} {'type': 'loss', 'content': 0.184149369597435, 'timestamp': '2025-10-01 04:24:39.210450', 'step': 9633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:39.240437', 'step': 9633, 'epoch': 2} {'type': 'loss', 'content': 0.11479789763689041, 'timestamp': '2025-10-01 04:24:39.242711', 'step': 9634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:39.272604', 'step': 9634, 'epoch': 2} {'type': 'loss', 'content': 0.15895149111747742, 'timestamp': '2025-10-01 04:24:39.274811', 'step': 9635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:39.305211', 'step': 9635, 'epoch': 2} {'type': 'loss', 'content': 0.12033415585756302, 'timestamp': '2025-10-01 04:24:39.329289', 'step': 9636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:39.360618', 'step': 9636, 'epoch': 2} {'type': 'loss', 'content': 0.12580522894859314, 'timestamp': '2025-10-01 04:24:39.363251', 'step': 9637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.393984', 'step': 9637, 'epoch': 2} {'type': 'loss', 'content': 0.07823335379362106, 'timestamp': '2025-10-01 04:24:39.396854', 'step': 9638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.428357', 'step': 9638, 'epoch': 2} {'type': 'loss', 'content': 0.07187392562627792, 'timestamp': '2025-10-01 04:24:39.430938', 'step': 9639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:39.461184', 'step': 9639, 'epoch': 2} {'type': 'loss', 'content': 0.16979612410068512, 'timestamp': '2025-10-01 04:24:39.484833', 'step': 9640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:39.514998', 'step': 9640, 'epoch': 2} {'type': 'loss', 'content': 0.09861964732408524, 'timestamp': '2025-10-01 04:24:39.517152', 'step': 9641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.547449', 'step': 9641, 'epoch': 2} {'type': 'loss', 'content': 0.2507782578468323, 'timestamp': '2025-10-01 04:24:39.549597', 'step': 9642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.580029', 'step': 9642, 'epoch': 2} {'type': 'loss', 'content': 0.05981956049799919, 'timestamp': '2025-10-01 04:24:39.582069', 'step': 9643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.637458', 'step': 9643, 'epoch': 2} {'type': 'loss', 'content': 0.09674990922212601, 'timestamp': '2025-10-01 04:24:39.661470', 'step': 9644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.700275', 'step': 9644, 'epoch': 2} {'type': 'loss', 'content': 0.21653755009174347, 'timestamp': '2025-10-01 04:24:39.702595', 'step': 9645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:39.736228', 'step': 9645, 'epoch': 2} {'type': 'loss', 'content': 0.13859768211841583, 'timestamp': '2025-10-01 04:24:39.738461', 'step': 9646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.776506', 'step': 9646, 'epoch': 2} {'type': 'loss', 'content': 0.10230530798435211, 'timestamp': '2025-10-01 04:24:39.779262', 'step': 9647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:39.827047', 'step': 9647, 'epoch': 2} {'type': 'loss', 'content': 0.2380959689617157, 'timestamp': '2025-10-01 04:24:39.850703', 'step': 9648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:39.895372', 'step': 9648, 'epoch': 2} {'type': 'loss', 'content': 0.0676504597067833, 'timestamp': '2025-10-01 04:24:39.897561', 'step': 9649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:39.929508', 'step': 9649, 'epoch': 2} {'type': 'loss', 'content': 0.126803919672966, 'timestamp': '2025-10-01 04:24:39.931836', 'step': 9650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:39.963874', 'step': 9650, 'epoch': 2} {'type': 'loss', 'content': 0.07253313809633255, 'timestamp': '2025-10-01 04:24:39.966304', 'step': 9651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:40.026178', 'step': 9651, 'epoch': 2} {'type': 'loss', 'content': 0.1002473309636116, 'timestamp': '2025-10-01 04:24:40.049699', 'step': 9652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:40.081263', 'step': 9652, 'epoch': 2} {'type': 'loss', 'content': 0.11178860813379288, 'timestamp': '2025-10-01 04:24:40.083481', 'step': 9653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:40.123254', 'step': 9653, 'epoch': 2} {'type': 'loss', 'content': 0.1119670495390892, 'timestamp': '2025-10-01 04:24:40.125575', 'step': 9654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:40.172417', 'step': 9654, 'epoch': 2} {'type': 'loss', 'content': 0.15606869757175446, 'timestamp': '2025-10-01 04:24:40.174576', 'step': 9655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:40.213413', 'step': 9655, 'epoch': 2} {'type': 'loss', 'content': 0.11121199280023575, 'timestamp': '2025-10-01 04:24:40.237244', 'step': 9656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:40.268784', 'step': 9656, 'epoch': 2} {'type': 'loss', 'content': 0.11532845348119736, 'timestamp': '2025-10-01 04:24:40.271049', 'step': 9657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:40.302021', 'step': 9657, 'epoch': 2} {'type': 'loss', 'content': 0.04667627066373825, 'timestamp': '2025-10-01 04:24:40.304378', 'step': 9658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:40.337089', 'step': 9658, 'epoch': 2} {'type': 'loss', 'content': 0.13615505397319794, 'timestamp': '2025-10-01 04:24:40.339544', 'step': 9659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:40.369823', 'step': 9659, 'epoch': 2} {'type': 'loss', 'content': 0.11067686975002289, 'timestamp': '2025-10-01 04:24:40.393620', 'step': 9660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:40.425198', 'step': 9660, 'epoch': 2} {'type': 'loss', 'content': 0.09371934831142426, 'timestamp': '2025-10-01 04:24:40.433243', 'step': 9661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:24:40.463941', 'step': 9661, 'epoch': 2} {'type': 'loss', 'content': 0.1288890242576599, 'timestamp': '2025-10-01 04:24:40.468755', 'step': 9662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:40.503974', 'step': 9662, 'epoch': 2} {'type': 'loss', 'content': 0.1018371507525444, 'timestamp': '2025-10-01 04:24:40.506772', 'step': 9663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:40.538001', 'step': 9663, 'epoch': 2} {'type': 'loss', 'content': 0.06325475871562958, 'timestamp': '2025-10-01 04:24:40.561936', 'step': 9664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:40.593178', 'step': 9664, 'epoch': 2} {'type': 'loss', 'content': 0.09335000813007355, 'timestamp': '2025-10-01 04:24:40.595302', 'step': 9665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:40.626295', 'step': 9665, 'epoch': 2} {'type': 'loss', 'content': 0.19733662903308868, 'timestamp': '2025-10-01 04:24:40.630730', 'step': 9666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:40.661503', 'step': 9666, 'epoch': 2} {'type': 'loss', 'content': 0.11084894835948944, 'timestamp': '2025-10-01 04:24:40.663627', 'step': 9667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:40.694128', 'step': 9667, 'epoch': 2} {'type': 'loss', 'content': 0.12706045806407928, 'timestamp': '2025-10-01 04:24:40.718057', 'step': 9668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:40.748691', 'step': 9668, 'epoch': 2} {'type': 'loss', 'content': 0.08920861780643463, 'timestamp': '2025-10-01 04:24:40.750975', 'step': 9669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:40.782255', 'step': 9669, 'epoch': 2} {'type': 'loss', 'content': 0.18092544376850128, 'timestamp': '2025-10-01 04:24:40.784634', 'step': 9670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:40.818285', 'step': 9670, 'epoch': 2} {'type': 'loss', 'content': 0.21180807054042816, 'timestamp': '2025-10-01 04:24:40.820574', 'step': 9671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:40.851782', 'step': 9671, 'epoch': 2} {'type': 'loss', 'content': 0.2722601890563965, 'timestamp': '2025-10-01 04:24:40.875619', 'step': 9672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:40.906098', 'step': 9672, 'epoch': 2} {'type': 'loss', 'content': 0.05701205134391785, 'timestamp': '2025-10-01 04:24:40.913115', 'step': 9673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:40.943470', 'step': 9673, 'epoch': 2} {'type': 'loss', 'content': 0.11500401794910431, 'timestamp': '2025-10-01 04:24:40.945469', 'step': 9674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:40.975910', 'step': 9674, 'epoch': 2} {'type': 'loss', 'content': 0.12673619389533997, 'timestamp': '2025-10-01 04:24:40.978368', 'step': 9675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:41.010866', 'step': 9675, 'epoch': 2} {'type': 'loss', 'content': 0.18980388343334198, 'timestamp': '2025-10-01 04:24:41.034387', 'step': 9676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:41.064814', 'step': 9676, 'epoch': 2} {'type': 'loss', 'content': 0.04437382146716118, 'timestamp': '2025-10-01 04:24:41.066803', 'step': 9677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:41.098041', 'step': 9677, 'epoch': 2} {'type': 'loss', 'content': 0.19604864716529846, 'timestamp': '2025-10-01 04:24:41.100147', 'step': 9678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:41.130436', 'step': 9678, 'epoch': 2} {'type': 'loss', 'content': 0.10210265964269638, 'timestamp': '2025-10-01 04:24:41.132495', 'step': 9679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:24:41.162999', 'step': 9679, 'epoch': 2} {'type': 'loss', 'content': 0.11168146878480911, 'timestamp': '2025-10-01 04:24:41.188172', 'step': 9680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:41.219576', 'step': 9680, 'epoch': 2} {'type': 'loss', 'content': 0.06366295367479324, 'timestamp': '2025-10-01 04:24:41.221915', 'step': 9681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:41.255684', 'step': 9681, 'epoch': 2} {'type': 'loss', 'content': 0.062102384865283966, 'timestamp': '2025-10-01 04:24:41.257713', 'step': 9682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:41.288837', 'step': 9682, 'epoch': 2} {'type': 'loss', 'content': 0.18110638856887817, 'timestamp': '2025-10-01 04:24:41.290947', 'step': 9683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:41.321608', 'step': 9683, 'epoch': 2} {'type': 'loss', 'content': 0.16537177562713623, 'timestamp': '2025-10-01 04:24:41.345279', 'step': 9684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:41.376228', 'step': 9684, 'epoch': 2} {'type': 'loss', 'content': 0.10804426670074463, 'timestamp': '2025-10-01 04:24:41.378193', 'step': 9685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:41.412593', 'step': 9685, 'epoch': 2} {'type': 'loss', 'content': 0.15820012986660004, 'timestamp': '2025-10-01 04:24:41.414773', 'step': 9686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:41.445761', 'step': 9686, 'epoch': 2} {'type': 'loss', 'content': 0.11620669066905975, 'timestamp': '2025-10-01 04:24:41.447853', 'step': 9687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:41.482828', 'step': 9687, 'epoch': 2} {'type': 'loss', 'content': 0.11137557029724121, 'timestamp': '2025-10-01 04:24:41.506319', 'step': 9688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:41.537372', 'step': 9688, 'epoch': 2} {'type': 'loss', 'content': 0.1016046553850174, 'timestamp': '2025-10-01 04:24:41.539437', 'step': 9689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:41.570088', 'step': 9689, 'epoch': 2} {'type': 'loss', 'content': 0.09407893568277359, 'timestamp': '2025-10-01 04:24:41.572087', 'step': 9690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:41.602286', 'step': 9690, 'epoch': 2} {'type': 'loss', 'content': 0.0985729992389679, 'timestamp': '2025-10-01 04:24:41.604750', 'step': 9691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:41.635614', 'step': 9691, 'epoch': 2} {'type': 'loss', 'content': 0.09758415818214417, 'timestamp': '2025-10-01 04:24:41.659186', 'step': 9692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:41.690468', 'step': 9692, 'epoch': 2} {'type': 'loss', 'content': 0.06560228765010834, 'timestamp': '2025-10-01 04:24:41.692632', 'step': 9693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:41.723516', 'step': 9693, 'epoch': 2} {'type': 'loss', 'content': 0.1399441808462143, 'timestamp': '2025-10-01 04:24:41.725499', 'step': 9694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:41.756182', 'step': 9694, 'epoch': 2} {'type': 'loss', 'content': 0.12915687263011932, 'timestamp': '2025-10-01 04:24:41.764256', 'step': 9695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:41.795569', 'step': 9695, 'epoch': 2} {'type': 'loss', 'content': 0.09437673538923264, 'timestamp': '2025-10-01 04:24:41.819068', 'step': 9696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:41.848702', 'step': 9696, 'epoch': 2} {'type': 'loss', 'content': 0.14173997938632965, 'timestamp': '2025-10-01 04:24:41.850734', 'step': 9697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:41.881090', 'step': 9697, 'epoch': 2} {'type': 'loss', 'content': 0.13242311775684357, 'timestamp': '2025-10-01 04:24:41.883163', 'step': 9698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:41.916230', 'step': 9698, 'epoch': 2} {'type': 'loss', 'content': 0.10155149549245834, 'timestamp': '2025-10-01 04:24:41.918354', 'step': 9699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:41.953357', 'step': 9699, 'epoch': 2} {'type': 'loss', 'content': 0.06259704381227493, 'timestamp': '2025-10-01 04:24:41.977022', 'step': 9700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:42.009509', 'step': 9700, 'epoch': 2} {'type': 'loss', 'content': 0.14005382359027863, 'timestamp': '2025-10-01 04:24:42.011570', 'step': 9701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:42.043116', 'step': 9701, 'epoch': 2} {'type': 'loss', 'content': 0.13153433799743652, 'timestamp': '2025-10-01 04:24:42.045147', 'step': 9702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:42.077222', 'step': 9702, 'epoch': 2} {'type': 'loss', 'content': 0.18164493143558502, 'timestamp': '2025-10-01 04:24:42.079321', 'step': 9703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:42.111016', 'step': 9703, 'epoch': 2} {'type': 'loss', 'content': 0.05245305597782135, 'timestamp': '2025-10-01 04:24:42.134537', 'step': 9704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:42.166026', 'step': 9704, 'epoch': 2} {'type': 'loss', 'content': 0.1800273060798645, 'timestamp': '2025-10-01 04:24:42.174510', 'step': 9705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:42.210740', 'step': 9705, 'epoch': 2} {'type': 'loss', 'content': 0.11787780374288559, 'timestamp': '2025-10-01 04:24:42.213676', 'step': 9706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:42.245407', 'step': 9706, 'epoch': 2} {'type': 'loss', 'content': 0.09748654812574387, 'timestamp': '2025-10-01 04:24:42.247892', 'step': 9707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:42.279621', 'step': 9707, 'epoch': 2} {'type': 'loss', 'content': 0.09430975466966629, 'timestamp': '2025-10-01 04:24:42.303188', 'step': 9708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:42.335102', 'step': 9708, 'epoch': 2} {'type': 'loss', 'content': 0.07030370831489563, 'timestamp': '2025-10-01 04:24:42.337321', 'step': 9709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:42.367972', 'step': 9709, 'epoch': 2} {'type': 'loss', 'content': 0.14266552031040192, 'timestamp': '2025-10-01 04:24:42.370409', 'step': 9710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:42.402384', 'step': 9710, 'epoch': 2} {'type': 'loss', 'content': 0.06456083059310913, 'timestamp': '2025-10-01 04:24:42.405187', 'step': 9711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:42.437393', 'step': 9711, 'epoch': 2} {'type': 'loss', 'content': 0.11908545345067978, 'timestamp': '2025-10-01 04:24:42.460891', 'step': 9712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:42.491166', 'step': 9712, 'epoch': 2} {'type': 'loss', 'content': 0.10216911882162094, 'timestamp': '2025-10-01 04:24:42.493108', 'step': 9713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:42.523270', 'step': 9713, 'epoch': 2} {'type': 'loss', 'content': 0.12641610205173492, 'timestamp': '2025-10-01 04:24:42.525406', 'step': 9714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:24:42.556219', 'step': 9714, 'epoch': 2} {'type': 'loss', 'content': 0.104603111743927, 'timestamp': '2025-10-01 04:24:42.560577', 'step': 9715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:42.592416', 'step': 9715, 'epoch': 2} {'type': 'loss', 'content': 0.08040371537208557, 'timestamp': '2025-10-01 04:24:42.615916', 'step': 9716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:42.645894', 'step': 9716, 'epoch': 2} {'type': 'loss', 'content': 0.1598639339208603, 'timestamp': '2025-10-01 04:24:42.647974', 'step': 9717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:42.680264', 'step': 9717, 'epoch': 2} {'type': 'loss', 'content': 0.05961061269044876, 'timestamp': '2025-10-01 04:24:42.682399', 'step': 9718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:42.715654', 'step': 9718, 'epoch': 2} {'type': 'loss', 'content': 0.17391128838062286, 'timestamp': '2025-10-01 04:24:42.717935', 'step': 9719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:42.750337', 'step': 9719, 'epoch': 2} {'type': 'loss', 'content': 0.05891704559326172, 'timestamp': '2025-10-01 04:24:42.774124', 'step': 9720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:42.807527', 'step': 9720, 'epoch': 2} {'type': 'loss', 'content': 0.14820779860019684, 'timestamp': '2025-10-01 04:24:42.809832', 'step': 9721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:42.841266', 'step': 9721, 'epoch': 2} {'type': 'loss', 'content': 0.15291516482830048, 'timestamp': '2025-10-01 04:24:42.843367', 'step': 9722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:42.875699', 'step': 9722, 'epoch': 2} {'type': 'loss', 'content': 0.07429562509059906, 'timestamp': '2025-10-01 04:24:42.877759', 'step': 9723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:42.909695', 'step': 9723, 'epoch': 2} {'type': 'loss', 'content': 0.10016226768493652, 'timestamp': '2025-10-01 04:24:42.933524', 'step': 9724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:42.966082', 'step': 9724, 'epoch': 2} {'type': 'loss', 'content': 0.10443967580795288, 'timestamp': '2025-10-01 04:24:42.983638', 'step': 9725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:43.016525', 'step': 9725, 'epoch': 2} {'type': 'loss', 'content': 0.13704003393650055, 'timestamp': '2025-10-01 04:24:43.018533', 'step': 9726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:43.053142', 'step': 9726, 'epoch': 2} {'type': 'loss', 'content': 0.17989759147167206, 'timestamp': '2025-10-01 04:24:43.055334', 'step': 9727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:43.096126', 'step': 9727, 'epoch': 2} {'type': 'loss', 'content': 0.044123925268650055, 'timestamp': '2025-10-01 04:24:43.119676', 'step': 9728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:43.151368', 'step': 9728, 'epoch': 2} {'type': 'loss', 'content': 0.17773272097110748, 'timestamp': '2025-10-01 04:24:43.154068', 'step': 9729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:43.185896', 'step': 9729, 'epoch': 2} {'type': 'loss', 'content': 0.0669110044836998, 'timestamp': '2025-10-01 04:24:43.187996', 'step': 9730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:43.218415', 'step': 9730, 'epoch': 2} {'type': 'loss', 'content': 0.12240979820489883, 'timestamp': '2025-10-01 04:24:43.220509', 'step': 9731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:43.251386', 'step': 9731, 'epoch': 2} {'type': 'loss', 'content': 0.10340946167707443, 'timestamp': '2025-10-01 04:24:43.274902', 'step': 9732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:43.305343', 'step': 9732, 'epoch': 2} {'type': 'loss', 'content': 0.1254531890153885, 'timestamp': '2025-10-01 04:24:43.307525', 'step': 9733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:43.339361', 'step': 9733, 'epoch': 2} {'type': 'loss', 'content': 0.07748330384492874, 'timestamp': '2025-10-01 04:24:43.341706', 'step': 9734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:43.382651', 'step': 9734, 'epoch': 2} {'type': 'loss', 'content': 0.13117335736751556, 'timestamp': '2025-10-01 04:24:43.384908', 'step': 9735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:43.419428', 'step': 9735, 'epoch': 2} {'type': 'loss', 'content': 0.1512448936700821, 'timestamp': '2025-10-01 04:24:43.443364', 'step': 9736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:43.474514', 'step': 9736, 'epoch': 2} {'type': 'loss', 'content': 0.12219934165477753, 'timestamp': '2025-10-01 04:24:43.476744', 'step': 9737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:43.507141', 'step': 9737, 'epoch': 2} {'type': 'loss', 'content': 0.12895143032073975, 'timestamp': '2025-10-01 04:24:43.509223', 'step': 9738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:43.541037', 'step': 9738, 'epoch': 2} {'type': 'loss', 'content': 0.17162993550300598, 'timestamp': '2025-10-01 04:24:43.543576', 'step': 9739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:43.574304', 'step': 9739, 'epoch': 2} {'type': 'loss', 'content': 0.15613645315170288, 'timestamp': '2025-10-01 04:24:43.597750', 'step': 9740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:43.629320', 'step': 9740, 'epoch': 2} {'type': 'loss', 'content': 0.1750773787498474, 'timestamp': '2025-10-01 04:24:43.631913', 'step': 9741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:43.663068', 'step': 9741, 'epoch': 2} {'type': 'loss', 'content': 0.1141362190246582, 'timestamp': '2025-10-01 04:24:43.665090', 'step': 9742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:43.696970', 'step': 9742, 'epoch': 2} {'type': 'loss', 'content': 0.16910478472709656, 'timestamp': '2025-10-01 04:24:43.699164', 'step': 9743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:43.730169', 'step': 9743, 'epoch': 2} {'type': 'loss', 'content': 0.10937449336051941, 'timestamp': '2025-10-01 04:24:43.753758', 'step': 9744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:43.784892', 'step': 9744, 'epoch': 2} {'type': 'loss', 'content': 0.0802091583609581, 'timestamp': '2025-10-01 04:24:43.787069', 'step': 9745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:43.818756', 'step': 9745, 'epoch': 2} {'type': 'loss', 'content': 0.06432017683982849, 'timestamp': '2025-10-01 04:24:43.820805', 'step': 9746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:43.852707', 'step': 9746, 'epoch': 2} {'type': 'loss', 'content': 0.06746384501457214, 'timestamp': '2025-10-01 04:24:43.854849', 'step': 9747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:43.885905', 'step': 9747, 'epoch': 2} {'type': 'loss', 'content': 0.15194860100746155, 'timestamp': '2025-10-01 04:24:43.909454', 'step': 9748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:43.940304', 'step': 9748, 'epoch': 2} {'type': 'loss', 'content': 0.1637885570526123, 'timestamp': '2025-10-01 04:24:43.942475', 'step': 9749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:43.973786', 'step': 9749, 'epoch': 2} {'type': 'loss', 'content': 0.12834641337394714, 'timestamp': '2025-10-01 04:24:43.976122', 'step': 9750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:44.007112', 'step': 9750, 'epoch': 2} {'type': 'loss', 'content': 0.1942514330148697, 'timestamp': '2025-10-01 04:24:44.009054', 'step': 9751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:44.039460', 'step': 9751, 'epoch': 2} {'type': 'loss', 'content': 0.12441650778055191, 'timestamp': '2025-10-01 04:24:44.068426', 'step': 9752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:44.113284', 'step': 9752, 'epoch': 2} {'type': 'loss', 'content': 0.10951060056686401, 'timestamp': '2025-10-01 04:24:44.128721', 'step': 9753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:44.169281', 'step': 9753, 'epoch': 2} {'type': 'loss', 'content': 0.10722154378890991, 'timestamp': '2025-10-01 04:24:44.177148', 'step': 9754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:44.221264', 'step': 9754, 'epoch': 2} {'type': 'loss', 'content': 0.21428456902503967, 'timestamp': '2025-10-01 04:24:44.227469', 'step': 9755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:44.270128', 'step': 9755, 'epoch': 2} {'type': 'loss', 'content': 0.07840243726968765, 'timestamp': '2025-10-01 04:24:44.304393', 'step': 9756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:44.342107', 'step': 9756, 'epoch': 2} {'type': 'loss', 'content': 0.11213598400354385, 'timestamp': '2025-10-01 04:24:44.351903', 'step': 9757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:44.391912', 'step': 9757, 'epoch': 2} {'type': 'loss', 'content': 0.046023376286029816, 'timestamp': '2025-10-01 04:24:44.400573', 'step': 9758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:44.443373', 'step': 9758, 'epoch': 2} {'type': 'loss', 'content': 0.114109106361866, 'timestamp': '2025-10-01 04:24:44.450234', 'step': 9759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:44.487573', 'step': 9759, 'epoch': 2} {'type': 'loss', 'content': 0.13101768493652344, 'timestamp': '2025-10-01 04:24:44.526180', 'step': 9760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:44.595506', 'step': 9760, 'epoch': 2} {'type': 'loss', 'content': 0.08700200915336609, 'timestamp': '2025-10-01 04:24:44.619319', 'step': 9761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:44.683594', 'step': 9761, 'epoch': 2} {'type': 'loss', 'content': 0.07941751927137375, 'timestamp': '2025-10-01 04:24:44.698535', 'step': 9762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:44.753044', 'step': 9762, 'epoch': 2} {'type': 'loss', 'content': 0.17469961941242218, 'timestamp': '2025-10-01 04:24:44.771293', 'step': 9763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:44.827328', 'step': 9763, 'epoch': 2} {'type': 'loss', 'content': 0.10691700130701065, 'timestamp': '2025-10-01 04:24:44.856451', 'step': 9764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:44.910432', 'step': 9764, 'epoch': 2} {'type': 'loss', 'content': 0.12942947447299957, 'timestamp': '2025-10-01 04:24:44.936894', 'step': 9765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:44.983588', 'step': 9765, 'epoch': 2} {'type': 'loss', 'content': 0.11572753638029099, 'timestamp': '2025-10-01 04:24:44.991205', 'step': 9766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:45.047675', 'step': 9766, 'epoch': 2} {'type': 'loss', 'content': 0.05845991522073746, 'timestamp': '2025-10-01 04:24:45.066760', 'step': 9767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:45.124634', 'step': 9767, 'epoch': 2} {'type': 'loss', 'content': 0.21544697880744934, 'timestamp': '2025-10-01 04:24:45.156179', 'step': 9768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:45.207960', 'step': 9768, 'epoch': 2} {'type': 'loss', 'content': 0.16588163375854492, 'timestamp': '2025-10-01 04:24:45.229870', 'step': 9769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:45.280084', 'step': 9769, 'epoch': 2} {'type': 'loss', 'content': 0.15777015686035156, 'timestamp': '2025-10-01 04:24:45.287107', 'step': 9770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:45.336861', 'step': 9770, 'epoch': 2} {'type': 'loss', 'content': 0.06278765946626663, 'timestamp': '2025-10-01 04:24:45.339481', 'step': 9771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:45.372335', 'step': 9771, 'epoch': 2} {'type': 'loss', 'content': 0.07486744225025177, 'timestamp': '2025-10-01 04:24:45.395663', 'step': 9772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:45.430704', 'step': 9772, 'epoch': 2} {'type': 'loss', 'content': 0.10090439021587372, 'timestamp': '2025-10-01 04:24:45.432750', 'step': 9773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:45.464471', 'step': 9773, 'epoch': 2} {'type': 'loss', 'content': 0.09579619765281677, 'timestamp': '2025-10-01 04:24:45.467425', 'step': 9774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:45.511715', 'step': 9774, 'epoch': 2} {'type': 'loss', 'content': 0.08371096104383469, 'timestamp': '2025-10-01 04:24:45.513988', 'step': 9775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:24:45.550588', 'step': 9775, 'epoch': 2} {'type': 'loss', 'content': 0.07534770667552948, 'timestamp': '2025-10-01 04:24:45.582639', 'step': 9776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:45.620986', 'step': 9776, 'epoch': 2} {'type': 'loss', 'content': 0.07436821609735489, 'timestamp': '2025-10-01 04:24:45.623066', 'step': 9777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:45.669547', 'step': 9777, 'epoch': 2} {'type': 'loss', 'content': 0.08812545239925385, 'timestamp': '2025-10-01 04:24:45.671690', 'step': 9778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:45.704864', 'step': 9778, 'epoch': 2} {'type': 'loss', 'content': 0.17106905579566956, 'timestamp': '2025-10-01 04:24:45.707014', 'step': 9779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:24:45.747920', 'step': 9779, 'epoch': 2} {'type': 'loss', 'content': 0.07231604307889938, 'timestamp': '2025-10-01 04:24:45.773531', 'step': 9780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:45.803544', 'step': 9780, 'epoch': 2} {'type': 'loss', 'content': 0.1527756005525589, 'timestamp': '2025-10-01 04:24:45.805583', 'step': 9781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:45.835824', 'step': 9781, 'epoch': 2} {'type': 'loss', 'content': 0.13330543041229248, 'timestamp': '2025-10-01 04:24:45.837872', 'step': 9782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:45.868844', 'step': 9782, 'epoch': 2} {'type': 'loss', 'content': 0.09951134771108627, 'timestamp': '2025-10-01 04:24:45.870711', 'step': 9783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:45.900513', 'step': 9783, 'epoch': 2} {'type': 'loss', 'content': 0.17224852740764618, 'timestamp': '2025-10-01 04:24:45.924018', 'step': 9784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:45.955615', 'step': 9784, 'epoch': 2} {'type': 'loss', 'content': 0.17708201706409454, 'timestamp': '2025-10-01 04:24:45.957661', 'step': 9785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:45.988254', 'step': 9785, 'epoch': 2} {'type': 'loss', 'content': 0.07253646105527878, 'timestamp': '2025-10-01 04:24:45.990845', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:24:57.114937', 'step': 9786, 'epoch': 2} {'type': 'pplx', 'content': 11219.556981164335, 'timestamp': '2025-10-01 04:24:57.117856', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:57.156311', 'step': 9786, 'epoch': 2} {'type': 'loss', 'content': 0.1170974150300026, 'timestamp': '2025-10-01 04:24:57.158526', 'step': 9787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:57.197051', 'step': 9787, 'epoch': 2} {'type': 'loss', 'content': 0.06462392956018448, 'timestamp': '2025-10-01 04:24:57.221001', 'step': 9788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:57.255801', 'step': 9788, 'epoch': 2} {'type': 'loss', 'content': 0.12146016955375671, 'timestamp': '2025-10-01 04:24:57.257913', 'step': 9789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:57.297075', 'step': 9789, 'epoch': 2} {'type': 'loss', 'content': 0.05298404023051262, 'timestamp': '2025-10-01 04:24:57.308517', 'step': 9790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:57.341114', 'step': 9790, 'epoch': 2} {'type': 'loss', 'content': 0.092450350522995, 'timestamp': '2025-10-01 04:24:57.343342', 'step': 9791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:57.376051', 'step': 9791, 'epoch': 2} {'type': 'loss', 'content': 0.23014310002326965, 'timestamp': '2025-10-01 04:24:57.399723', 'step': 9792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:57.436185', 'step': 9792, 'epoch': 2} {'type': 'loss', 'content': 0.08765752613544464, 'timestamp': '2025-10-01 04:24:57.438561', 'step': 9793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:57.484523', 'step': 9793, 'epoch': 2} {'type': 'loss', 'content': 0.12789668142795563, 'timestamp': '2025-10-01 04:24:57.486665', 'step': 9794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:57.518198', 'step': 9794, 'epoch': 2} {'type': 'loss', 'content': 0.1107448861002922, 'timestamp': '2025-10-01 04:24:57.520823', 'step': 9795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:57.561040', 'step': 9795, 'epoch': 2} {'type': 'loss', 'content': 0.11004218459129333, 'timestamp': '2025-10-01 04:24:57.584769', 'step': 9796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:57.618405', 'step': 9796, 'epoch': 2} {'type': 'loss', 'content': 0.11681844294071198, 'timestamp': '2025-10-01 04:24:57.620578', 'step': 9797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:24:57.674236', 'step': 9797, 'epoch': 2} {'type': 'loss', 'content': 0.0722632184624672, 'timestamp': '2025-10-01 04:24:57.676599', 'step': 9798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:57.711483', 'step': 9798, 'epoch': 2} {'type': 'loss', 'content': 0.08090021461248398, 'timestamp': '2025-10-01 04:24:57.713791', 'step': 9799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:57.747691', 'step': 9799, 'epoch': 2} {'type': 'loss', 'content': 0.13578882813453674, 'timestamp': '2025-10-01 04:24:57.771649', 'step': 9800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:57.806634', 'step': 9800, 'epoch': 2} {'type': 'loss', 'content': 0.109470434486866, 'timestamp': '2025-10-01 04:24:57.808758', 'step': 9801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:57.852727', 'step': 9801, 'epoch': 2} {'type': 'loss', 'content': 0.10193736851215363, 'timestamp': '2025-10-01 04:24:57.854865', 'step': 9802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:57.887380', 'step': 9802, 'epoch': 2} {'type': 'loss', 'content': 0.1254122406244278, 'timestamp': '2025-10-01 04:24:57.897809', 'step': 9803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:57.939536', 'step': 9803, 'epoch': 2} {'type': 'loss', 'content': 0.0925016850233078, 'timestamp': '2025-10-01 04:24:57.963203', 'step': 9804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:57.996983', 'step': 9804, 'epoch': 2} {'type': 'loss', 'content': 0.137080118060112, 'timestamp': '2025-10-01 04:24:57.999027', 'step': 9805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:58.040662', 'step': 9805, 'epoch': 2} {'type': 'loss', 'content': 0.10576025396585464, 'timestamp': '2025-10-01 04:24:58.042824', 'step': 9806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:58.078885', 'step': 9806, 'epoch': 2} {'type': 'loss', 'content': 0.049836836755275726, 'timestamp': '2025-10-01 04:24:58.080961', 'step': 9807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:58.116933', 'step': 9807, 'epoch': 2} {'type': 'loss', 'content': 0.09807967394590378, 'timestamp': '2025-10-01 04:24:58.140323', 'step': 9808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:58.172889', 'step': 9808, 'epoch': 2} {'type': 'loss', 'content': 0.1587446630001068, 'timestamp': '2025-10-01 04:24:58.175867', 'step': 9809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:58.227555', 'step': 9809, 'epoch': 2} {'type': 'loss', 'content': 0.08359755575656891, 'timestamp': '2025-10-01 04:24:58.229667', 'step': 9810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:58.269216', 'step': 9810, 'epoch': 2} {'type': 'loss', 'content': 0.17652666568756104, 'timestamp': '2025-10-01 04:24:58.273200', 'step': 9811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:58.317160', 'step': 9811, 'epoch': 2} {'type': 'loss', 'content': 0.059683363884687424, 'timestamp': '2025-10-01 04:24:58.341388', 'step': 9812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:58.381691', 'step': 9812, 'epoch': 2} {'type': 'loss', 'content': 0.06222917139530182, 'timestamp': '2025-10-01 04:24:58.391911', 'step': 9813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:58.430710', 'step': 9813, 'epoch': 2} {'type': 'loss', 'content': 0.04471134766936302, 'timestamp': '2025-10-01 04:24:58.432919', 'step': 9814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:58.467334', 'step': 9814, 'epoch': 2} {'type': 'loss', 'content': 0.09765861928462982, 'timestamp': '2025-10-01 04:24:58.481334', 'step': 9815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:58.513964', 'step': 9815, 'epoch': 2} {'type': 'loss', 'content': 0.0939422994852066, 'timestamp': '2025-10-01 04:24:58.537611', 'step': 9816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:58.569688', 'step': 9816, 'epoch': 2} {'type': 'loss', 'content': 0.11728860437870026, 'timestamp': '2025-10-01 04:24:58.571848', 'step': 9817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:58.614219', 'step': 9817, 'epoch': 2} {'type': 'loss', 'content': 0.1000199243426323, 'timestamp': '2025-10-01 04:24:58.616435', 'step': 9818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:58.657566', 'step': 9818, 'epoch': 2} {'type': 'loss', 'content': 0.1169256940484047, 'timestamp': '2025-10-01 04:24:58.659640', 'step': 9819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:58.698229', 'step': 9819, 'epoch': 2} {'type': 'loss', 'content': 0.13163906335830688, 'timestamp': '2025-10-01 04:24:58.723336', 'step': 9820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:58.780688', 'step': 9820, 'epoch': 2} {'type': 'loss', 'content': 0.1224619522690773, 'timestamp': '2025-10-01 04:24:58.782871', 'step': 9821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:58.818560', 'step': 9821, 'epoch': 2} {'type': 'loss', 'content': 0.12555043399333954, 'timestamp': '2025-10-01 04:24:58.821208', 'step': 9822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:58.859865', 'step': 9822, 'epoch': 2} {'type': 'loss', 'content': 0.12070005387067795, 'timestamp': '2025-10-01 04:24:58.868820', 'step': 9823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:24:58.904756', 'step': 9823, 'epoch': 2} {'type': 'loss', 'content': 0.2618117034435272, 'timestamp': '2025-10-01 04:24:58.929963', 'step': 9824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:58.966876', 'step': 9824, 'epoch': 2} {'type': 'loss', 'content': 0.12064594030380249, 'timestamp': '2025-10-01 04:24:58.969626', 'step': 9825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:59.017264', 'step': 9825, 'epoch': 2} {'type': 'loss', 'content': 0.17153871059417725, 'timestamp': '2025-10-01 04:24:59.031379', 'step': 9826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:59.087289', 'step': 9826, 'epoch': 2} {'type': 'loss', 'content': 0.15015894174575806, 'timestamp': '2025-10-01 04:24:59.108797', 'step': 9827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:59.165804', 'step': 9827, 'epoch': 2} {'type': 'loss', 'content': 0.07369963079690933, 'timestamp': '2025-10-01 04:24:59.204823', 'step': 9828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:59.246615', 'step': 9828, 'epoch': 2} {'type': 'loss', 'content': 0.15182337164878845, 'timestamp': '2025-10-01 04:24:59.248885', 'step': 9829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:59.286186', 'step': 9829, 'epoch': 2} {'type': 'loss', 'content': 0.06368962675333023, 'timestamp': '2025-10-01 04:24:59.291954', 'step': 9830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:59.341535', 'step': 9830, 'epoch': 2} {'type': 'loss', 'content': 0.03958044573664665, 'timestamp': '2025-10-01 04:24:59.355868', 'step': 9831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:59.390936', 'step': 9831, 'epoch': 2} {'type': 'loss', 'content': 0.07679349929094315, 'timestamp': '2025-10-01 04:24:59.415190', 'step': 9832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:59.449877', 'step': 9832, 'epoch': 2} {'type': 'loss', 'content': 0.13991588354110718, 'timestamp': '2025-10-01 04:24:59.456530', 'step': 9833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:59.501809', 'step': 9833, 'epoch': 2} {'type': 'loss', 'content': 0.12080352753400803, 'timestamp': '2025-10-01 04:24:59.509628', 'step': 9834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:59.553723', 'step': 9834, 'epoch': 2} {'type': 'loss', 'content': 0.07144248485565186, 'timestamp': '2025-10-01 04:24:59.557245', 'step': 9835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:24:59.602942', 'step': 9835, 'epoch': 2} {'type': 'loss', 'content': 0.1541152149438858, 'timestamp': '2025-10-01 04:24:59.627083', 'step': 9836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:59.678396', 'step': 9836, 'epoch': 2} {'type': 'loss', 'content': 0.17087754607200623, 'timestamp': '2025-10-01 04:24:59.680577', 'step': 9837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:59.718326', 'step': 9837, 'epoch': 2} {'type': 'loss', 'content': 0.11340392380952835, 'timestamp': '2025-10-01 04:24:59.722758', 'step': 9838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:59.777038', 'step': 9838, 'epoch': 2} {'type': 'loss', 'content': 0.0923222079873085, 'timestamp': '2025-10-01 04:24:59.780343', 'step': 9839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:24:59.831696', 'step': 9839, 'epoch': 2} {'type': 'loss', 'content': 0.07259884476661682, 'timestamp': '2025-10-01 04:24:59.858889', 'step': 9840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:59.901202', 'step': 9840, 'epoch': 2} {'type': 'loss', 'content': 0.08538033813238144, 'timestamp': '2025-10-01 04:24:59.904313', 'step': 9841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:24:59.947856', 'step': 9841, 'epoch': 2} {'type': 'loss', 'content': 0.14835533499717712, 'timestamp': '2025-10-01 04:24:59.953129', 'step': 9842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:24:59.999688', 'step': 9842, 'epoch': 2} {'type': 'loss', 'content': 0.16997018456459045, 'timestamp': '2025-10-01 04:25:00.002024', 'step': 9843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:00.041623', 'step': 9843, 'epoch': 2} {'type': 'loss', 'content': 0.1730310171842575, 'timestamp': '2025-10-01 04:25:00.066790', 'step': 9844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:00.101621', 'step': 9844, 'epoch': 2} {'type': 'loss', 'content': 0.08972256630659103, 'timestamp': '2025-10-01 04:25:00.107256', 'step': 9845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:00.144079', 'step': 9845, 'epoch': 2} {'type': 'loss', 'content': 0.13915976881980896, 'timestamp': '2025-10-01 04:25:00.148007', 'step': 9846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:00.185397', 'step': 9846, 'epoch': 2} {'type': 'loss', 'content': 0.11731351166963577, 'timestamp': '2025-10-01 04:25:00.191293', 'step': 9847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:00.229358', 'step': 9847, 'epoch': 2} {'type': 'loss', 'content': 0.09313566982746124, 'timestamp': '2025-10-01 04:25:00.258105', 'step': 9848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:00.292481', 'step': 9848, 'epoch': 2} {'type': 'loss', 'content': 0.14205200970172882, 'timestamp': '2025-10-01 04:25:00.295459', 'step': 9849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:00.329385', 'step': 9849, 'epoch': 2} {'type': 'loss', 'content': 0.14419521391391754, 'timestamp': '2025-10-01 04:25:00.331584', 'step': 9850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:00.378949', 'step': 9850, 'epoch': 2} {'type': 'loss', 'content': 0.20820385217666626, 'timestamp': '2025-10-01 04:25:00.381321', 'step': 9851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:00.415407', 'step': 9851, 'epoch': 2} {'type': 'loss', 'content': 0.15398046374320984, 'timestamp': '2025-10-01 04:25:00.439657', 'step': 9852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:00.482589', 'step': 9852, 'epoch': 2} {'type': 'loss', 'content': 0.18539246916770935, 'timestamp': '2025-10-01 04:25:00.484741', 'step': 9853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:00.522532', 'step': 9853, 'epoch': 2} {'type': 'loss', 'content': 0.08595842868089676, 'timestamp': '2025-10-01 04:25:00.524847', 'step': 9854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:00.559496', 'step': 9854, 'epoch': 2} {'type': 'loss', 'content': 0.17470569908618927, 'timestamp': '2025-10-01 04:25:00.561667', 'step': 9855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:00.604010', 'step': 9855, 'epoch': 2} {'type': 'loss', 'content': 0.13058651983737946, 'timestamp': '2025-10-01 04:25:00.628712', 'step': 9856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:00.663751', 'step': 9856, 'epoch': 2} {'type': 'loss', 'content': 0.074432373046875, 'timestamp': '2025-10-01 04:25:00.666023', 'step': 9857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:00.700513', 'step': 9857, 'epoch': 2} {'type': 'loss', 'content': 0.08109579235315323, 'timestamp': '2025-10-01 04:25:00.702672', 'step': 9858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:00.737790', 'step': 9858, 'epoch': 2} {'type': 'loss', 'content': 0.17459924519062042, 'timestamp': '2025-10-01 04:25:00.739928', 'step': 9859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:00.782667', 'step': 9859, 'epoch': 2} {'type': 'loss', 'content': 0.08386179059743881, 'timestamp': '2025-10-01 04:25:00.806808', 'step': 9860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:00.858087', 'step': 9860, 'epoch': 2} {'type': 'loss', 'content': 0.1269247978925705, 'timestamp': '2025-10-01 04:25:00.860403', 'step': 9861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:00.896180', 'step': 9861, 'epoch': 2} {'type': 'loss', 'content': 0.10127295553684235, 'timestamp': '2025-10-01 04:25:00.898327', 'step': 9862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:00.933225', 'step': 9862, 'epoch': 2} {'type': 'loss', 'content': 0.1263977587223053, 'timestamp': '2025-10-01 04:25:00.936272', 'step': 9863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:00.978403', 'step': 9863, 'epoch': 2} {'type': 'loss', 'content': 0.09212726354598999, 'timestamp': '2025-10-01 04:25:01.002167', 'step': 9864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:01.045171', 'step': 9864, 'epoch': 2} {'type': 'loss', 'content': 0.060997799038887024, 'timestamp': '2025-10-01 04:25:01.047371', 'step': 9865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:01.080588', 'step': 9865, 'epoch': 2} {'type': 'loss', 'content': 0.08448943495750427, 'timestamp': '2025-10-01 04:25:01.082745', 'step': 9866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:01.122621', 'step': 9866, 'epoch': 2} {'type': 'loss', 'content': 0.0822182446718216, 'timestamp': '2025-10-01 04:25:01.124752', 'step': 9867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:01.158151', 'step': 9867, 'epoch': 2} {'type': 'loss', 'content': 0.059793420135974884, 'timestamp': '2025-10-01 04:25:01.182540', 'step': 9868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:01.216125', 'step': 9868, 'epoch': 2} {'type': 'loss', 'content': 0.18414916098117828, 'timestamp': '2025-10-01 04:25:01.218431', 'step': 9869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:01.250424', 'step': 9869, 'epoch': 2} {'type': 'loss', 'content': 0.12646624445915222, 'timestamp': '2025-10-01 04:25:01.253395', 'step': 9870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:01.288101', 'step': 9870, 'epoch': 2} {'type': 'loss', 'content': 0.24637728929519653, 'timestamp': '2025-10-01 04:25:01.290259', 'step': 9871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:01.323645', 'step': 9871, 'epoch': 2} {'type': 'loss', 'content': 0.17504173517227173, 'timestamp': '2025-10-01 04:25:01.347275', 'step': 9872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:01.380309', 'step': 9872, 'epoch': 2} {'type': 'loss', 'content': 0.10039083659648895, 'timestamp': '2025-10-01 04:25:01.382412', 'step': 9873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:01.422740', 'step': 9873, 'epoch': 2} {'type': 'loss', 'content': 0.12043674290180206, 'timestamp': '2025-10-01 04:25:01.424825', 'step': 9874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:01.457921', 'step': 9874, 'epoch': 2} {'type': 'loss', 'content': 0.16235347092151642, 'timestamp': '2025-10-01 04:25:01.460184', 'step': 9875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:01.492397', 'step': 9875, 'epoch': 2} {'type': 'loss', 'content': 0.20221832394599915, 'timestamp': '2025-10-01 04:25:01.516258', 'step': 9876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:01.548683', 'step': 9876, 'epoch': 2} {'type': 'loss', 'content': 0.1438278704881668, 'timestamp': '2025-10-01 04:25:01.550739', 'step': 9877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:01.582511', 'step': 9877, 'epoch': 2} {'type': 'loss', 'content': 0.1928219199180603, 'timestamp': '2025-10-01 04:25:01.584526', 'step': 9878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:01.617616', 'step': 9878, 'epoch': 2} {'type': 'loss', 'content': 0.1051003709435463, 'timestamp': '2025-10-01 04:25:01.619618', 'step': 9879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:01.651338', 'step': 9879, 'epoch': 2} {'type': 'loss', 'content': 0.08930333703756332, 'timestamp': '2025-10-01 04:25:01.674954', 'step': 9880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:01.709708', 'step': 9880, 'epoch': 2} {'type': 'loss', 'content': 0.14450721442699432, 'timestamp': '2025-10-01 04:25:01.711871', 'step': 9881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:01.746071', 'step': 9881, 'epoch': 2} {'type': 'loss', 'content': 0.09839632362127304, 'timestamp': '2025-10-01 04:25:01.748005', 'step': 9882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:01.781140', 'step': 9882, 'epoch': 2} {'type': 'loss', 'content': 0.15315325558185577, 'timestamp': '2025-10-01 04:25:01.783599', 'step': 9883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:01.822988', 'step': 9883, 'epoch': 2} {'type': 'loss', 'content': 0.0778072252869606, 'timestamp': '2025-10-01 04:25:01.846732', 'step': 9884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:01.884691', 'step': 9884, 'epoch': 2} {'type': 'loss', 'content': 0.1542384773492813, 'timestamp': '2025-10-01 04:25:01.899508', 'step': 9885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:01.949532', 'step': 9885, 'epoch': 2} {'type': 'loss', 'content': 0.1418961137533188, 'timestamp': '2025-10-01 04:25:01.953003', 'step': 9886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:01.997045', 'step': 9886, 'epoch': 2} {'type': 'loss', 'content': 0.07517867535352707, 'timestamp': '2025-10-01 04:25:01.999865', 'step': 9887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:02.041889', 'step': 9887, 'epoch': 2} {'type': 'loss', 'content': 0.1686703860759735, 'timestamp': '2025-10-01 04:25:02.065828', 'step': 9888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.107821', 'step': 9888, 'epoch': 2} {'type': 'loss', 'content': 0.10074079781770706, 'timestamp': '2025-10-01 04:25:02.120946', 'step': 9889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:02.154908', 'step': 9889, 'epoch': 2} {'type': 'loss', 'content': 0.0739796832203865, 'timestamp': '2025-10-01 04:25:02.156881', 'step': 9890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.195630', 'step': 9890, 'epoch': 2} {'type': 'loss', 'content': 0.16084851324558258, 'timestamp': '2025-10-01 04:25:02.198747', 'step': 9891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.237959', 'step': 9891, 'epoch': 2} {'type': 'loss', 'content': 0.10157246142625809, 'timestamp': '2025-10-01 04:25:02.261494', 'step': 9892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.308303', 'step': 9892, 'epoch': 2} {'type': 'loss', 'content': 0.11936486512422562, 'timestamp': '2025-10-01 04:25:02.310436', 'step': 9893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:02.342215', 'step': 9893, 'epoch': 2} {'type': 'loss', 'content': 0.10200704634189606, 'timestamp': '2025-10-01 04:25:02.344547', 'step': 9894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.378645', 'step': 9894, 'epoch': 2} {'type': 'loss', 'content': 0.2152382880449295, 'timestamp': '2025-10-01 04:25:02.380749', 'step': 9895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.426380', 'step': 9895, 'epoch': 2} {'type': 'loss', 'content': 0.0863926038146019, 'timestamp': '2025-10-01 04:25:02.449957', 'step': 9896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.483129', 'step': 9896, 'epoch': 2} {'type': 'loss', 'content': 0.07696056365966797, 'timestamp': '2025-10-01 04:25:02.485078', 'step': 9897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.517235', 'step': 9897, 'epoch': 2} {'type': 'loss', 'content': 0.14965854585170746, 'timestamp': '2025-10-01 04:25:02.519305', 'step': 9898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:02.550655', 'step': 9898, 'epoch': 2} {'type': 'loss', 'content': 0.12275663763284683, 'timestamp': '2025-10-01 04:25:02.552785', 'step': 9899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:02.585695', 'step': 9899, 'epoch': 2} {'type': 'loss', 'content': 0.15148119628429413, 'timestamp': '2025-10-01 04:25:02.610212', 'step': 9900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.642194', 'step': 9900, 'epoch': 2} {'type': 'loss', 'content': 0.15634368360042572, 'timestamp': '2025-10-01 04:25:02.645220', 'step': 9901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.679469', 'step': 9901, 'epoch': 2} {'type': 'loss', 'content': 0.14733953773975372, 'timestamp': '2025-10-01 04:25:02.693335', 'step': 9902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:02.727120', 'step': 9902, 'epoch': 2} {'type': 'loss', 'content': 0.11669769138097763, 'timestamp': '2025-10-01 04:25:02.729253', 'step': 9903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:02.762883', 'step': 9903, 'epoch': 2} {'type': 'loss', 'content': 0.14639341831207275, 'timestamp': '2025-10-01 04:25:02.787188', 'step': 9904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:02.820408', 'step': 9904, 'epoch': 2} {'type': 'loss', 'content': 0.021058978512883186, 'timestamp': '2025-10-01 04:25:02.822383', 'step': 9905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:02.863421', 'step': 9905, 'epoch': 2} {'type': 'loss', 'content': 0.13769124448299408, 'timestamp': '2025-10-01 04:25:02.869627', 'step': 9906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:02.905195', 'step': 9906, 'epoch': 2} {'type': 'loss', 'content': 0.05299462005496025, 'timestamp': '2025-10-01 04:25:02.907265', 'step': 9907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:02.943925', 'step': 9907, 'epoch': 2} {'type': 'loss', 'content': 0.12958022952079773, 'timestamp': '2025-10-01 04:25:02.967429', 'step': 9908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:03.001806', 'step': 9908, 'epoch': 2} {'type': 'loss', 'content': 0.09402541816234589, 'timestamp': '2025-10-01 04:25:03.003759', 'step': 9909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:03.036899', 'step': 9909, 'epoch': 2} {'type': 'loss', 'content': 0.11062571406364441, 'timestamp': '2025-10-01 04:25:03.049678', 'step': 9910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:03.092992', 'step': 9910, 'epoch': 2} {'type': 'loss', 'content': 0.06725508719682693, 'timestamp': '2025-10-01 04:25:03.095414', 'step': 9911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:03.131410', 'step': 9911, 'epoch': 2} {'type': 'loss', 'content': 0.13334499299526215, 'timestamp': '2025-10-01 04:25:03.155260', 'step': 9912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:03.188819', 'step': 9912, 'epoch': 2} {'type': 'loss', 'content': 0.15403436124324799, 'timestamp': '2025-10-01 04:25:03.191053', 'step': 9913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:03.225105', 'step': 9913, 'epoch': 2} {'type': 'loss', 'content': 0.1205437034368515, 'timestamp': '2025-10-01 04:25:03.227436', 'step': 9914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:03.261190', 'step': 9914, 'epoch': 2} {'type': 'loss', 'content': 0.11216827481985092, 'timestamp': '2025-10-01 04:25:03.263653', 'step': 9915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:03.295115', 'step': 9915, 'epoch': 2} {'type': 'loss', 'content': 0.11172986030578613, 'timestamp': '2025-10-01 04:25:03.318889', 'step': 9916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:03.376391', 'step': 9916, 'epoch': 2} {'type': 'loss', 'content': 0.08756784349679947, 'timestamp': '2025-10-01 04:25:03.379925', 'step': 9917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:03.416506', 'step': 9917, 'epoch': 2} {'type': 'loss', 'content': 0.08283264189958572, 'timestamp': '2025-10-01 04:25:03.419447', 'step': 9918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:03.459984', 'step': 9918, 'epoch': 2} {'type': 'loss', 'content': 0.11088167876005173, 'timestamp': '2025-10-01 04:25:03.462163', 'step': 9919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:03.496804', 'step': 9919, 'epoch': 2} {'type': 'loss', 'content': 0.08644158393144608, 'timestamp': '2025-10-01 04:25:03.522699', 'step': 9920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:03.555167', 'step': 9920, 'epoch': 2} {'type': 'loss', 'content': 0.12111245840787888, 'timestamp': '2025-10-01 04:25:03.557866', 'step': 9921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:03.594742', 'step': 9921, 'epoch': 2} {'type': 'loss', 'content': 0.1042364165186882, 'timestamp': '2025-10-01 04:25:03.596835', 'step': 9922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:03.633178', 'step': 9922, 'epoch': 2} {'type': 'loss', 'content': 0.16803763806819916, 'timestamp': '2025-10-01 04:25:03.635159', 'step': 9923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:03.667560', 'step': 9923, 'epoch': 2} {'type': 'loss', 'content': 0.08198963105678558, 'timestamp': '2025-10-01 04:25:03.691655', 'step': 9924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:03.726037', 'step': 9924, 'epoch': 2} {'type': 'loss', 'content': 0.1358885020017624, 'timestamp': '2025-10-01 04:25:03.728149', 'step': 9925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:03.761079', 'step': 9925, 'epoch': 2} {'type': 'loss', 'content': 0.08044083416461945, 'timestamp': '2025-10-01 04:25:03.763164', 'step': 9926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:03.805974', 'step': 9926, 'epoch': 2} {'type': 'loss', 'content': 0.05774172767996788, 'timestamp': '2025-10-01 04:25:03.810076', 'step': 9927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:03.855571', 'step': 9927, 'epoch': 2} {'type': 'loss', 'content': 0.15302962064743042, 'timestamp': '2025-10-01 04:25:03.879327', 'step': 9928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:03.915850', 'step': 9928, 'epoch': 2} {'type': 'loss', 'content': 0.07732082903385162, 'timestamp': '2025-10-01 04:25:03.918390', 'step': 9929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:03.951689', 'step': 9929, 'epoch': 2} {'type': 'loss', 'content': 0.2974712550640106, 'timestamp': '2025-10-01 04:25:03.953930', 'step': 9930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:03.986357', 'step': 9930, 'epoch': 2} {'type': 'loss', 'content': 0.08889271318912506, 'timestamp': '2025-10-01 04:25:03.990371', 'step': 9931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:04.028530', 'step': 9931, 'epoch': 2} {'type': 'loss', 'content': 0.11426234245300293, 'timestamp': '2025-10-01 04:25:04.052798', 'step': 9932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:04.086281', 'step': 9932, 'epoch': 2} {'type': 'loss', 'content': 0.09801642596721649, 'timestamp': '2025-10-01 04:25:04.090003', 'step': 9933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:04.126153', 'step': 9933, 'epoch': 2} {'type': 'loss', 'content': 0.1601719856262207, 'timestamp': '2025-10-01 04:25:04.128719', 'step': 9934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:04.161448', 'step': 9934, 'epoch': 2} {'type': 'loss', 'content': 0.05726341903209686, 'timestamp': '2025-10-01 04:25:04.163639', 'step': 9935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:04.197846', 'step': 9935, 'epoch': 2} {'type': 'loss', 'content': 0.10767419636249542, 'timestamp': '2025-10-01 04:25:04.221395', 'step': 9936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:04.269675', 'step': 9936, 'epoch': 2} {'type': 'loss', 'content': 0.15287116169929504, 'timestamp': '2025-10-01 04:25:04.272220', 'step': 9937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:04.310714', 'step': 9937, 'epoch': 2} {'type': 'loss', 'content': 0.03207588568329811, 'timestamp': '2025-10-01 04:25:04.313001', 'step': 9938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:04.366584', 'step': 9938, 'epoch': 2} {'type': 'loss', 'content': 0.09612864255905151, 'timestamp': '2025-10-01 04:25:04.369530', 'step': 9939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:04.403093', 'step': 9939, 'epoch': 2} {'type': 'loss', 'content': 0.07435319572687149, 'timestamp': '2025-10-01 04:25:04.440296', 'step': 9940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:04.492900', 'step': 9940, 'epoch': 2} {'type': 'loss', 'content': 0.17383944988250732, 'timestamp': '2025-10-01 04:25:04.495628', 'step': 9941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:04.531141', 'step': 9941, 'epoch': 2} {'type': 'loss', 'content': 0.055971600115299225, 'timestamp': '2025-10-01 04:25:04.533281', 'step': 9942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:04.585441', 'step': 9942, 'epoch': 2} {'type': 'loss', 'content': 0.15510712563991547, 'timestamp': '2025-10-01 04:25:04.588108', 'step': 9943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:04.622230', 'step': 9943, 'epoch': 2} {'type': 'loss', 'content': 0.11172603815793991, 'timestamp': '2025-10-01 04:25:04.645636', 'step': 9944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:04.679219', 'step': 9944, 'epoch': 2} {'type': 'loss', 'content': 0.1772400289773941, 'timestamp': '2025-10-01 04:25:04.681542', 'step': 9945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:04.719357', 'step': 9945, 'epoch': 2} {'type': 'loss', 'content': 0.12866486608982086, 'timestamp': '2025-10-01 04:25:04.721795', 'step': 9946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:04.759631', 'step': 9946, 'epoch': 2} {'type': 'loss', 'content': 0.09013998508453369, 'timestamp': '2025-10-01 04:25:04.761890', 'step': 9947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:04.815934', 'step': 9947, 'epoch': 2} {'type': 'loss', 'content': 0.11701752990484238, 'timestamp': '2025-10-01 04:25:04.840237', 'step': 9948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:04.876241', 'step': 9948, 'epoch': 2} {'type': 'loss', 'content': 0.137936070561409, 'timestamp': '2025-10-01 04:25:04.878827', 'step': 9949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:04.912089', 'step': 9949, 'epoch': 2} {'type': 'loss', 'content': 0.08523348718881607, 'timestamp': '2025-10-01 04:25:04.914590', 'step': 9950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:04.948546', 'step': 9950, 'epoch': 2} {'type': 'loss', 'content': 0.11917272955179214, 'timestamp': '2025-10-01 04:25:04.950996', 'step': 9951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:04.993720', 'step': 9951, 'epoch': 2} {'type': 'loss', 'content': 0.11742589622735977, 'timestamp': '2025-10-01 04:25:05.017644', 'step': 9952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:05.052231', 'step': 9952, 'epoch': 2} {'type': 'loss', 'content': 0.04527580738067627, 'timestamp': '2025-10-01 04:25:05.054617', 'step': 9953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.087856', 'step': 9953, 'epoch': 2} {'type': 'loss', 'content': 0.23009146749973297, 'timestamp': '2025-10-01 04:25:05.091207', 'step': 9954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.124910', 'step': 9954, 'epoch': 2} {'type': 'loss', 'content': 0.2028420865535736, 'timestamp': '2025-10-01 04:25:05.127122', 'step': 9955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:05.163439', 'step': 9955, 'epoch': 2} {'type': 'loss', 'content': 0.08693134039640427, 'timestamp': '2025-10-01 04:25:05.187377', 'step': 9956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.221241', 'step': 9956, 'epoch': 2} {'type': 'loss', 'content': 0.0940452292561531, 'timestamp': '2025-10-01 04:25:05.223435', 'step': 9957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:05.257753', 'step': 9957, 'epoch': 2} {'type': 'loss', 'content': 0.13420014083385468, 'timestamp': '2025-10-01 04:25:05.260572', 'step': 9958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.293496', 'step': 9958, 'epoch': 2} {'type': 'loss', 'content': 0.06328193098306656, 'timestamp': '2025-10-01 04:25:05.295917', 'step': 9959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:05.331269', 'step': 9959, 'epoch': 2} {'type': 'loss', 'content': 0.1319504827260971, 'timestamp': '2025-10-01 04:25:05.354935', 'step': 9960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.390911', 'step': 9960, 'epoch': 2} {'type': 'loss', 'content': 0.14427900314331055, 'timestamp': '2025-10-01 04:25:05.393609', 'step': 9961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.427461', 'step': 9961, 'epoch': 2} {'type': 'loss', 'content': 0.13435836136341095, 'timestamp': '2025-10-01 04:25:05.429920', 'step': 9962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.465133', 'step': 9962, 'epoch': 2} {'type': 'loss', 'content': 0.08478185534477234, 'timestamp': '2025-10-01 04:25:05.467535', 'step': 9963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:05.500551', 'step': 9963, 'epoch': 2} {'type': 'loss', 'content': 0.06870406121015549, 'timestamp': '2025-10-01 04:25:05.524248', 'step': 9964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:05.558375', 'step': 9964, 'epoch': 2} {'type': 'loss', 'content': 0.16354672610759735, 'timestamp': '2025-10-01 04:25:05.560560', 'step': 9965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.598448', 'step': 9965, 'epoch': 2} {'type': 'loss', 'content': 0.05953967943787575, 'timestamp': '2025-10-01 04:25:05.600456', 'step': 9966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:05.651626', 'step': 9966, 'epoch': 2} {'type': 'loss', 'content': 0.0681849792599678, 'timestamp': '2025-10-01 04:25:05.653740', 'step': 9967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.686813', 'step': 9967, 'epoch': 2} {'type': 'loss', 'content': 0.2516496181488037, 'timestamp': '2025-10-01 04:25:05.710469', 'step': 9968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:05.744838', 'step': 9968, 'epoch': 2} {'type': 'loss', 'content': 0.04204569756984711, 'timestamp': '2025-10-01 04:25:05.746757', 'step': 9969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:05.779702', 'step': 9969, 'epoch': 2} {'type': 'loss', 'content': 0.0916241705417633, 'timestamp': '2025-10-01 04:25:05.792698', 'step': 9970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.825508', 'step': 9970, 'epoch': 2} {'type': 'loss', 'content': 0.14004723727703094, 'timestamp': '2025-10-01 04:25:05.827601', 'step': 9971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:05.864411', 'step': 9971, 'epoch': 2} {'type': 'loss', 'content': 0.06804389506578445, 'timestamp': '2025-10-01 04:25:05.887961', 'step': 9972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:05.923867', 'step': 9972, 'epoch': 2} {'type': 'loss', 'content': 0.19351506233215332, 'timestamp': '2025-10-01 04:25:05.925907', 'step': 9973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:05.959154', 'step': 9973, 'epoch': 2} {'type': 'loss', 'content': 0.1286623328924179, 'timestamp': '2025-10-01 04:25:05.962055', 'step': 9974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:05.997087', 'step': 9974, 'epoch': 2} {'type': 'loss', 'content': 0.12030531466007233, 'timestamp': '2025-10-01 04:25:05.999193', 'step': 9975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:06.033387', 'step': 9975, 'epoch': 2} {'type': 'loss', 'content': 0.13625593483448029, 'timestamp': '2025-10-01 04:25:06.057067', 'step': 9976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:06.094023', 'step': 9976, 'epoch': 2} {'type': 'loss', 'content': 0.11317095160484314, 'timestamp': '2025-10-01 04:25:06.096172', 'step': 9977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:06.143478', 'step': 9977, 'epoch': 2} {'type': 'loss', 'content': 0.07042735069990158, 'timestamp': '2025-10-01 04:25:06.145741', 'step': 9978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:06.180696', 'step': 9978, 'epoch': 2} {'type': 'loss', 'content': 0.1286361664533615, 'timestamp': '2025-10-01 04:25:06.182884', 'step': 9979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:06.229375', 'step': 9979, 'epoch': 2} {'type': 'loss', 'content': 0.08487184345722198, 'timestamp': '2025-10-01 04:25:06.253057', 'step': 9980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:06.287358', 'step': 9980, 'epoch': 2} {'type': 'loss', 'content': 0.16264446079730988, 'timestamp': '2025-10-01 04:25:06.289416', 'step': 9981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:06.322878', 'step': 9981, 'epoch': 2} {'type': 'loss', 'content': 0.06267915666103363, 'timestamp': '2025-10-01 04:25:06.325024', 'step': 9982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:06.358142', 'step': 9982, 'epoch': 2} {'type': 'loss', 'content': 0.06860605627298355, 'timestamp': '2025-10-01 04:25:06.360117', 'step': 9983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:06.393991', 'step': 9983, 'epoch': 2} {'type': 'loss', 'content': 0.0701623409986496, 'timestamp': '2025-10-01 04:25:06.417469', 'step': 9984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:06.452262', 'step': 9984, 'epoch': 2} {'type': 'loss', 'content': 0.11572053283452988, 'timestamp': '2025-10-01 04:25:06.454276', 'step': 9985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:06.487401', 'step': 9985, 'epoch': 2} {'type': 'loss', 'content': 0.08750739693641663, 'timestamp': '2025-10-01 04:25:06.489464', 'step': 9986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:06.522537', 'step': 9986, 'epoch': 2} {'type': 'loss', 'content': 0.1343734860420227, 'timestamp': '2025-10-01 04:25:06.524656', 'step': 9987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:06.556474', 'step': 9987, 'epoch': 2} {'type': 'loss', 'content': 0.13942623138427734, 'timestamp': '2025-10-01 04:25:06.580239', 'step': 9988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:06.614922', 'step': 9988, 'epoch': 2} {'type': 'loss', 'content': 0.19031184911727905, 'timestamp': '2025-10-01 04:25:06.617079', 'step': 9989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:06.648902', 'step': 9989, 'epoch': 2} {'type': 'loss', 'content': 0.14731435477733612, 'timestamp': '2025-10-01 04:25:06.650970', 'step': 9990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:06.683756', 'step': 9990, 'epoch': 2} {'type': 'loss', 'content': 0.15089116990566254, 'timestamp': '2025-10-01 04:25:06.686290', 'step': 9991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:06.721584', 'step': 9991, 'epoch': 2} {'type': 'loss', 'content': 0.1306794285774231, 'timestamp': '2025-10-01 04:25:06.745632', 'step': 9992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:06.779443', 'step': 9992, 'epoch': 2} {'type': 'loss', 'content': 0.14131225645542145, 'timestamp': '2025-10-01 04:25:06.793458', 'step': 9993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:06.825947', 'step': 9993, 'epoch': 2} {'type': 'loss', 'content': 0.0933103933930397, 'timestamp': '2025-10-01 04:25:06.828093', 'step': 9994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:06.874685', 'step': 9994, 'epoch': 2} {'type': 'loss', 'content': 0.14589045941829681, 'timestamp': '2025-10-01 04:25:06.888410', 'step': 9995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:06.923126', 'step': 9995, 'epoch': 2} {'type': 'loss', 'content': 0.1717357188463211, 'timestamp': '2025-10-01 04:25:06.946908', 'step': 9996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:06.978939', 'step': 9996, 'epoch': 2} {'type': 'loss', 'content': 0.04865608736872673, 'timestamp': '2025-10-01 04:25:06.981079', 'step': 9997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:07.014398', 'step': 9997, 'epoch': 2} {'type': 'loss', 'content': 0.060377337038517, 'timestamp': '2025-10-01 04:25:07.016547', 'step': 9998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:07.049231', 'step': 9998, 'epoch': 2} {'type': 'loss', 'content': 0.10030995309352875, 'timestamp': '2025-10-01 04:25:07.055001', 'step': 9999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:07.096730', 'step': 9999, 'epoch': 2} {'type': 'loss', 'content': 0.10772503167390823, 'timestamp': '2025-10-01 04:25:07.120369', 'step': 10000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10000', 'timestamp': '2025-10-01 04:25:12.526075', 'step': 10000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:12.561060', 'step': 10000, 'epoch': 2} {'type': 'loss', 'content': 0.14252541959285736, 'timestamp': '2025-10-01 04:25:12.563316', 'step': 10001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:12.597155', 'step': 10001, 'epoch': 2} {'type': 'loss', 'content': 0.09118280559778214, 'timestamp': '2025-10-01 04:25:12.599328', 'step': 10002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:12.634864', 'step': 10002, 'epoch': 2} {'type': 'loss', 'content': 0.09161382168531418, 'timestamp': '2025-10-01 04:25:12.645790', 'step': 10003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:12.677832', 'step': 10003, 'epoch': 2} {'type': 'loss', 'content': 0.13356639444828033, 'timestamp': '2025-10-01 04:25:12.701572', 'step': 10004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:12.740602', 'step': 10004, 'epoch': 2} {'type': 'loss', 'content': 0.10597334057092667, 'timestamp': '2025-10-01 04:25:12.743144', 'step': 10005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:12.778828', 'step': 10005, 'epoch': 2} {'type': 'loss', 'content': 0.12246080487966537, 'timestamp': '2025-10-01 04:25:12.781274', 'step': 10006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:12.821614', 'step': 10006, 'epoch': 2} {'type': 'loss', 'content': 0.137669175863266, 'timestamp': '2025-10-01 04:25:12.824110', 'step': 10007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:12.862450', 'step': 10007, 'epoch': 2} {'type': 'loss', 'content': 0.22835788130760193, 'timestamp': '2025-10-01 04:25:12.894958', 'step': 10008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:12.930909', 'step': 10008, 'epoch': 2} {'type': 'loss', 'content': 0.11011964827775955, 'timestamp': '2025-10-01 04:25:12.942877', 'step': 10009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:12.977825', 'step': 10009, 'epoch': 2} {'type': 'loss', 'content': 0.07661513239145279, 'timestamp': '2025-10-01 04:25:12.980385', 'step': 10010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:13.023472', 'step': 10010, 'epoch': 2} {'type': 'loss', 'content': 0.14652447402477264, 'timestamp': '2025-10-01 04:25:13.026137', 'step': 10011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:13.070468', 'step': 10011, 'epoch': 2} {'type': 'loss', 'content': 0.06339696049690247, 'timestamp': '2025-10-01 04:25:13.094718', 'step': 10012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:13.131924', 'step': 10012, 'epoch': 2} {'type': 'loss', 'content': 0.13656863570213318, 'timestamp': '2025-10-01 04:25:13.134472', 'step': 10013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:13.170622', 'step': 10013, 'epoch': 2} {'type': 'loss', 'content': 0.12215619534254074, 'timestamp': '2025-10-01 04:25:13.173410', 'step': 10014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:13.209125', 'step': 10014, 'epoch': 2} {'type': 'loss', 'content': 0.11069920659065247, 'timestamp': '2025-10-01 04:25:13.211646', 'step': 10015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:13.248118', 'step': 10015, 'epoch': 2} {'type': 'loss', 'content': 0.06492460519075394, 'timestamp': '2025-10-01 04:25:13.272215', 'step': 10016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:13.311069', 'step': 10016, 'epoch': 2} {'type': 'loss', 'content': 0.056715164333581924, 'timestamp': '2025-10-01 04:25:13.313157', 'step': 10017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:13.356788', 'step': 10017, 'epoch': 2} {'type': 'loss', 'content': 0.09005869179964066, 'timestamp': '2025-10-01 04:25:13.359264', 'step': 10018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:13.394271', 'step': 10018, 'epoch': 2} {'type': 'loss', 'content': 0.047353923320770264, 'timestamp': '2025-10-01 04:25:13.396878', 'step': 10019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:13.431337', 'step': 10019, 'epoch': 2} {'type': 'loss', 'content': 0.15101878345012665, 'timestamp': '2025-10-01 04:25:13.455476', 'step': 10020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:13.490150', 'step': 10020, 'epoch': 2} {'type': 'loss', 'content': 0.06946545094251633, 'timestamp': '2025-10-01 04:25:13.492782', 'step': 10021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:13.527697', 'step': 10021, 'epoch': 2} {'type': 'loss', 'content': 0.02890121378004551, 'timestamp': '2025-10-01 04:25:13.530149', 'step': 10022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:13.564743', 'step': 10022, 'epoch': 2} {'type': 'loss', 'content': 0.08782434463500977, 'timestamp': '2025-10-01 04:25:13.567282', 'step': 10023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:13.603826', 'step': 10023, 'epoch': 2} {'type': 'loss', 'content': 0.037031177431344986, 'timestamp': '2025-10-01 04:25:13.627876', 'step': 10024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:13.662342', 'step': 10024, 'epoch': 2} {'type': 'loss', 'content': 0.10811537504196167, 'timestamp': '2025-10-01 04:25:13.664675', 'step': 10025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:13.697932', 'step': 10025, 'epoch': 2} {'type': 'loss', 'content': 0.09208232909440994, 'timestamp': '2025-10-01 04:25:13.700530', 'step': 10026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:13.737909', 'step': 10026, 'epoch': 2} {'type': 'loss', 'content': 0.10506381094455719, 'timestamp': '2025-10-01 04:25:13.740338', 'step': 10027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:13.774590', 'step': 10027, 'epoch': 2} {'type': 'loss', 'content': 0.11729294806718826, 'timestamp': '2025-10-01 04:25:13.808254', 'step': 10028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:13.841331', 'step': 10028, 'epoch': 2} {'type': 'loss', 'content': 0.07433045655488968, 'timestamp': '2025-10-01 04:25:13.843593', 'step': 10029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:13.876944', 'step': 10029, 'epoch': 2} {'type': 'loss', 'content': 0.10294529795646667, 'timestamp': '2025-10-01 04:25:13.879237', 'step': 10030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:13.913989', 'step': 10030, 'epoch': 2} {'type': 'loss', 'content': 0.18440841138362885, 'timestamp': '2025-10-01 04:25:13.916590', 'step': 10031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:13.960803', 'step': 10031, 'epoch': 2} {'type': 'loss', 'content': 0.07565736025571823, 'timestamp': '2025-10-01 04:25:13.984446', 'step': 10032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:14.018059', 'step': 10032, 'epoch': 2} {'type': 'loss', 'content': 0.08941753208637238, 'timestamp': '2025-10-01 04:25:14.026715', 'step': 10033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:14.071579', 'step': 10033, 'epoch': 2} {'type': 'loss', 'content': 0.051264118403196335, 'timestamp': '2025-10-01 04:25:14.074877', 'step': 10034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:25:14.109433', 'step': 10034, 'epoch': 2} {'type': 'loss', 'content': 0.09773364663124084, 'timestamp': '2025-10-01 04:25:14.113672', 'step': 10035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:14.146828', 'step': 10035, 'epoch': 2} {'type': 'loss', 'content': 0.04528047516942024, 'timestamp': '2025-10-01 04:25:14.170396', 'step': 10036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:14.227861', 'step': 10036, 'epoch': 2} {'type': 'loss', 'content': 0.05568820238113403, 'timestamp': '2025-10-01 04:25:14.230315', 'step': 10037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:14.273782', 'step': 10037, 'epoch': 2} {'type': 'loss', 'content': 0.17034496366977692, 'timestamp': '2025-10-01 04:25:14.276491', 'step': 10038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:14.317433', 'step': 10038, 'epoch': 2} {'type': 'loss', 'content': 0.1481413096189499, 'timestamp': '2025-10-01 04:25:14.319673', 'step': 10039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:14.355236', 'step': 10039, 'epoch': 2} {'type': 'loss', 'content': 0.06363614648580551, 'timestamp': '2025-10-01 04:25:14.379078', 'step': 10040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:14.413533', 'step': 10040, 'epoch': 2} {'type': 'loss', 'content': 0.09887174516916275, 'timestamp': '2025-10-01 04:25:14.415968', 'step': 10041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:14.462815', 'step': 10041, 'epoch': 2} {'type': 'loss', 'content': 0.15700960159301758, 'timestamp': '2025-10-01 04:25:14.465099', 'step': 10042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:14.509544', 'step': 10042, 'epoch': 2} {'type': 'loss', 'content': 0.13818088173866272, 'timestamp': '2025-10-01 04:25:14.511875', 'step': 10043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:14.545652', 'step': 10043, 'epoch': 2} {'type': 'loss', 'content': 0.11921117454767227, 'timestamp': '2025-10-01 04:25:14.569106', 'step': 10044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:14.606216', 'step': 10044, 'epoch': 2} {'type': 'loss', 'content': 0.13287338614463806, 'timestamp': '2025-10-01 04:25:14.608301', 'step': 10045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:14.655182', 'step': 10045, 'epoch': 2} {'type': 'loss', 'content': 0.08768587559461594, 'timestamp': '2025-10-01 04:25:14.657200', 'step': 10046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:14.693228', 'step': 10046, 'epoch': 2} {'type': 'loss', 'content': 0.11130377650260925, 'timestamp': '2025-10-01 04:25:14.696004', 'step': 10047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:14.746021', 'step': 10047, 'epoch': 2} {'type': 'loss', 'content': 0.12424146384000778, 'timestamp': '2025-10-01 04:25:14.769812', 'step': 10048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:14.812709', 'step': 10048, 'epoch': 2} {'type': 'loss', 'content': 0.0890621617436409, 'timestamp': '2025-10-01 04:25:14.814877', 'step': 10049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:14.852006', 'step': 10049, 'epoch': 2} {'type': 'loss', 'content': 0.06693807244300842, 'timestamp': '2025-10-01 04:25:14.854116', 'step': 10050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:14.902375', 'step': 10050, 'epoch': 2} {'type': 'loss', 'content': 0.11043979972600937, 'timestamp': '2025-10-01 04:25:14.904525', 'step': 10051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:14.961473', 'step': 10051, 'epoch': 2} {'type': 'loss', 'content': 0.06747911125421524, 'timestamp': '2025-10-01 04:25:14.984964', 'step': 10052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:15.022587', 'step': 10052, 'epoch': 2} {'type': 'loss', 'content': 0.13425254821777344, 'timestamp': '2025-10-01 04:25:15.024702', 'step': 10053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:15.075559', 'step': 10053, 'epoch': 2} {'type': 'loss', 'content': 0.08332620561122894, 'timestamp': '2025-10-01 04:25:15.079669', 'step': 10054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:15.116573', 'step': 10054, 'epoch': 2} {'type': 'loss', 'content': 0.0481591634452343, 'timestamp': '2025-10-01 04:25:15.119461', 'step': 10055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:15.154368', 'step': 10055, 'epoch': 2} {'type': 'loss', 'content': 0.12745527923107147, 'timestamp': '2025-10-01 04:25:15.189417', 'step': 10056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:15.223246', 'step': 10056, 'epoch': 2} {'type': 'loss', 'content': 0.03044944256544113, 'timestamp': '2025-10-01 04:25:15.233942', 'step': 10057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:15.269928', 'step': 10057, 'epoch': 2} {'type': 'loss', 'content': 0.09010138362646103, 'timestamp': '2025-10-01 04:25:15.271712', 'step': 10058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:15.330667', 'step': 10058, 'epoch': 2} {'type': 'loss', 'content': 0.0860787108540535, 'timestamp': '2025-10-01 04:25:15.332743', 'step': 10059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:15.390176', 'step': 10059, 'epoch': 2} {'type': 'loss', 'content': 0.10472434759140015, 'timestamp': '2025-10-01 04:25:15.413748', 'step': 10060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:15.448038', 'step': 10060, 'epoch': 2} {'type': 'loss', 'content': 0.07203369587659836, 'timestamp': '2025-10-01 04:25:15.450165', 'step': 10061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:15.485952', 'step': 10061, 'epoch': 2} {'type': 'loss', 'content': 0.054312318563461304, 'timestamp': '2025-10-01 04:25:15.488316', 'step': 10062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:15.522587', 'step': 10062, 'epoch': 2} {'type': 'loss', 'content': 0.0671568363904953, 'timestamp': '2025-10-01 04:25:15.524780', 'step': 10063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:15.559726', 'step': 10063, 'epoch': 2} {'type': 'loss', 'content': 0.07559387385845184, 'timestamp': '2025-10-01 04:25:15.593698', 'step': 10064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:15.628635', 'step': 10064, 'epoch': 2} {'type': 'loss', 'content': 0.06147383898496628, 'timestamp': '2025-10-01 04:25:15.630602', 'step': 10065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:15.667228', 'step': 10065, 'epoch': 2} {'type': 'loss', 'content': 0.18252772092819214, 'timestamp': '2025-10-01 04:25:15.669592', 'step': 10066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:15.708409', 'step': 10066, 'epoch': 2} {'type': 'loss', 'content': 0.059955988079309464, 'timestamp': '2025-10-01 04:25:15.711258', 'step': 10067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:15.762450', 'step': 10067, 'epoch': 2} {'type': 'loss', 'content': 0.07329752296209335, 'timestamp': '2025-10-01 04:25:15.786069', 'step': 10068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:15.822103', 'step': 10068, 'epoch': 2} {'type': 'loss', 'content': 0.20019282400608063, 'timestamp': '2025-10-01 04:25:15.824146', 'step': 10069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:15.868752', 'step': 10069, 'epoch': 2} {'type': 'loss', 'content': 0.14411517977714539, 'timestamp': '2025-10-01 04:25:15.870835', 'step': 10070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:15.904617', 'step': 10070, 'epoch': 2} {'type': 'loss', 'content': 0.07337357848882675, 'timestamp': '2025-10-01 04:25:15.906643', 'step': 10071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:15.955438', 'step': 10071, 'epoch': 2} {'type': 'loss', 'content': 0.060870710760354996, 'timestamp': '2025-10-01 04:25:15.978922', 'step': 10072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:16.028401', 'step': 10072, 'epoch': 2} {'type': 'loss', 'content': 0.08404874056577682, 'timestamp': '2025-10-01 04:25:16.030531', 'step': 10073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:16.081052', 'step': 10073, 'epoch': 2} {'type': 'loss', 'content': 0.09206017106771469, 'timestamp': '2025-10-01 04:25:16.083092', 'step': 10074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:16.121524', 'step': 10074, 'epoch': 2} {'type': 'loss', 'content': 0.11665138602256775, 'timestamp': '2025-10-01 04:25:16.123685', 'step': 10075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:16.161963', 'step': 10075, 'epoch': 2} {'type': 'loss', 'content': 0.13306474685668945, 'timestamp': '2025-10-01 04:25:16.185605', 'step': 10076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:16.234822', 'step': 10076, 'epoch': 2} {'type': 'loss', 'content': 0.1826460063457489, 'timestamp': '2025-10-01 04:25:16.245004', 'step': 10077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:16.282886', 'step': 10077, 'epoch': 2} {'type': 'loss', 'content': 0.07601391524076462, 'timestamp': '2025-10-01 04:25:16.284966', 'step': 10078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:16.319675', 'step': 10078, 'epoch': 2} {'type': 'loss', 'content': 0.17409788072109222, 'timestamp': '2025-10-01 04:25:16.322121', 'step': 10079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:16.356352', 'step': 10079, 'epoch': 2} {'type': 'loss', 'content': 0.08663099259138107, 'timestamp': '2025-10-01 04:25:16.379840', 'step': 10080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:16.412852', 'step': 10080, 'epoch': 2} {'type': 'loss', 'content': 0.06347479671239853, 'timestamp': '2025-10-01 04:25:16.414942', 'step': 10081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:16.449504', 'step': 10081, 'epoch': 2} {'type': 'loss', 'content': 0.06067588925361633, 'timestamp': '2025-10-01 04:25:16.451599', 'step': 10082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:16.489518', 'step': 10082, 'epoch': 2} {'type': 'loss', 'content': 0.11999370157718658, 'timestamp': '2025-10-01 04:25:16.491828', 'step': 10083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:16.525841', 'step': 10083, 'epoch': 2} {'type': 'loss', 'content': 0.1668354719877243, 'timestamp': '2025-10-01 04:25:16.549125', 'step': 10084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:16.583856', 'step': 10084, 'epoch': 2} {'type': 'loss', 'content': 0.1056692823767662, 'timestamp': '2025-10-01 04:25:16.585847', 'step': 10085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:16.620951', 'step': 10085, 'epoch': 2} {'type': 'loss', 'content': 0.08235760778188705, 'timestamp': '2025-10-01 04:25:16.623409', 'step': 10086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:16.669384', 'step': 10086, 'epoch': 2} {'type': 'loss', 'content': 0.10085278004407883, 'timestamp': '2025-10-01 04:25:16.671868', 'step': 10087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:16.710184', 'step': 10087, 'epoch': 2} {'type': 'loss', 'content': 0.056318171322345734, 'timestamp': '2025-10-01 04:25:16.733741', 'step': 10088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:16.768222', 'step': 10088, 'epoch': 2} {'type': 'loss', 'content': 0.09189054369926453, 'timestamp': '2025-10-01 04:25:16.770303', 'step': 10089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:16.803523', 'step': 10089, 'epoch': 2} {'type': 'loss', 'content': 0.058424826711416245, 'timestamp': '2025-10-01 04:25:16.805340', 'step': 10090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:16.839464', 'step': 10090, 'epoch': 2} {'type': 'loss', 'content': 0.1270972639322281, 'timestamp': '2025-10-01 04:25:16.841868', 'step': 10091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:16.891533', 'step': 10091, 'epoch': 2} {'type': 'loss', 'content': 0.08900178968906403, 'timestamp': '2025-10-01 04:25:16.924924', 'step': 10092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:16.960199', 'step': 10092, 'epoch': 2} {'type': 'loss', 'content': 0.10145213454961777, 'timestamp': '2025-10-01 04:25:16.962411', 'step': 10093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:17.004644', 'step': 10093, 'epoch': 2} {'type': 'loss', 'content': 0.11169667541980743, 'timestamp': '2025-10-01 04:25:17.006614', 'step': 10094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:17.060959', 'step': 10094, 'epoch': 2} {'type': 'loss', 'content': 0.0907084196805954, 'timestamp': '2025-10-01 04:25:17.063175', 'step': 10095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:17.096537', 'step': 10095, 'epoch': 2} {'type': 'loss', 'content': 0.11939360201358795, 'timestamp': '2025-10-01 04:25:17.120222', 'step': 10096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:17.167209', 'step': 10096, 'epoch': 2} {'type': 'loss', 'content': 0.07334586977958679, 'timestamp': '2025-10-01 04:25:17.169272', 'step': 10097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:17.205353', 'step': 10097, 'epoch': 2} {'type': 'loss', 'content': 0.07508895546197891, 'timestamp': '2025-10-01 04:25:17.208298', 'step': 10098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:17.243706', 'step': 10098, 'epoch': 2} {'type': 'loss', 'content': 0.13437208533287048, 'timestamp': '2025-10-01 04:25:17.245831', 'step': 10099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:17.284379', 'step': 10099, 'epoch': 2} {'type': 'loss', 'content': 0.13547107577323914, 'timestamp': '2025-10-01 04:25:17.307931', 'step': 10100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:17.371499', 'step': 10100, 'epoch': 2} {'type': 'loss', 'content': 0.19637976586818695, 'timestamp': '2025-10-01 04:25:17.373523', 'step': 10101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:17.418662', 'step': 10101, 'epoch': 2} {'type': 'loss', 'content': 0.10459792613983154, 'timestamp': '2025-10-01 04:25:17.420662', 'step': 10102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:17.461779', 'step': 10102, 'epoch': 2} {'type': 'loss', 'content': 0.0852414220571518, 'timestamp': '2025-10-01 04:25:17.464304', 'step': 10103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:17.500186', 'step': 10103, 'epoch': 2} {'type': 'loss', 'content': 0.06047265976667404, 'timestamp': '2025-10-01 04:25:17.523843', 'step': 10104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:17.561202', 'step': 10104, 'epoch': 2} {'type': 'loss', 'content': 0.06516550481319427, 'timestamp': '2025-10-01 04:25:17.563720', 'step': 10105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:17.604166', 'step': 10105, 'epoch': 2} {'type': 'loss', 'content': 0.1085442304611206, 'timestamp': '2025-10-01 04:25:17.612155', 'step': 10106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:17.650016', 'step': 10106, 'epoch': 2} {'type': 'loss', 'content': 0.10413119941949844, 'timestamp': '2025-10-01 04:25:17.652033', 'step': 10107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:17.684561', 'step': 10107, 'epoch': 2} {'type': 'loss', 'content': 0.05477629974484444, 'timestamp': '2025-10-01 04:25:17.708051', 'step': 10108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:17.754033', 'step': 10108, 'epoch': 2} {'type': 'loss', 'content': 0.09727384895086288, 'timestamp': '2025-10-01 04:25:17.757539', 'step': 10109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:17.790956', 'step': 10109, 'epoch': 2} {'type': 'loss', 'content': 0.11820084601640701, 'timestamp': '2025-10-01 04:25:17.804499', 'step': 10110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:17.838928', 'step': 10110, 'epoch': 2} {'type': 'loss', 'content': 0.11602885276079178, 'timestamp': '2025-10-01 04:25:17.841166', 'step': 10111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:17.885003', 'step': 10111, 'epoch': 2} {'type': 'loss', 'content': 0.051054298877716064, 'timestamp': '2025-10-01 04:25:17.908648', 'step': 10112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:17.942692', 'step': 10112, 'epoch': 2} {'type': 'loss', 'content': 0.1197441890835762, 'timestamp': '2025-10-01 04:25:17.949753', 'step': 10113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:17.996597', 'step': 10113, 'epoch': 2} {'type': 'loss', 'content': 0.057025689631700516, 'timestamp': '2025-10-01 04:25:17.999974', 'step': 10114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:18.033881', 'step': 10114, 'epoch': 2} {'type': 'loss', 'content': 0.09053555130958557, 'timestamp': '2025-10-01 04:25:18.036411', 'step': 10115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:18.083173', 'step': 10115, 'epoch': 2} {'type': 'loss', 'content': 0.07075899839401245, 'timestamp': '2025-10-01 04:25:18.106729', 'step': 10116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:25:18.147217', 'step': 10116, 'epoch': 2} {'type': 'loss', 'content': 0.08731389790773392, 'timestamp': '2025-10-01 04:25:18.149959', 'step': 10117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:18.184979', 'step': 10117, 'epoch': 2} {'type': 'loss', 'content': 0.06496719270944595, 'timestamp': '2025-10-01 04:25:18.187209', 'step': 10118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:18.230349', 'step': 10118, 'epoch': 2} {'type': 'loss', 'content': 0.051206063479185104, 'timestamp': '2025-10-01 04:25:18.245447', 'step': 10119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:18.280449', 'step': 10119, 'epoch': 2} {'type': 'loss', 'content': 0.1709558069705963, 'timestamp': '2025-10-01 04:25:18.304126', 'step': 10120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:18.338761', 'step': 10120, 'epoch': 2} {'type': 'loss', 'content': 0.2000325620174408, 'timestamp': '2025-10-01 04:25:18.340992', 'step': 10121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:18.375608', 'step': 10121, 'epoch': 2} {'type': 'loss', 'content': 0.07723400741815567, 'timestamp': '2025-10-01 04:25:18.377679', 'step': 10122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:18.411173', 'step': 10122, 'epoch': 2} {'type': 'loss', 'content': 0.090060755610466, 'timestamp': '2025-10-01 04:25:18.413663', 'step': 10123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:18.448583', 'step': 10123, 'epoch': 2} {'type': 'loss', 'content': 0.08459330350160599, 'timestamp': '2025-10-01 04:25:18.472049', 'step': 10124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:18.509734', 'step': 10124, 'epoch': 2} {'type': 'loss', 'content': 0.1450686752796173, 'timestamp': '2025-10-01 04:25:18.511769', 'step': 10125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:18.552902', 'step': 10125, 'epoch': 2} {'type': 'loss', 'content': 0.08273953944444656, 'timestamp': '2025-10-01 04:25:18.554979', 'step': 10126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:18.589076', 'step': 10126, 'epoch': 2} {'type': 'loss', 'content': 0.17064295709133148, 'timestamp': '2025-10-01 04:25:18.591429', 'step': 10127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:18.623520', 'step': 10127, 'epoch': 2} {'type': 'loss', 'content': 0.1254589855670929, 'timestamp': '2025-10-01 04:25:18.647107', 'step': 10128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:18.681353', 'step': 10128, 'epoch': 2} {'type': 'loss', 'content': 0.13375221192836761, 'timestamp': '2025-10-01 04:25:18.683359', 'step': 10129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:18.733818', 'step': 10129, 'epoch': 2} {'type': 'loss', 'content': 0.09359622001647949, 'timestamp': '2025-10-01 04:25:18.735848', 'step': 10130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:18.774585', 'step': 10130, 'epoch': 2} {'type': 'loss', 'content': 0.08320020884275436, 'timestamp': '2025-10-01 04:25:18.776745', 'step': 10131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:18.810675', 'step': 10131, 'epoch': 2} {'type': 'loss', 'content': 0.06680118292570114, 'timestamp': '2025-10-01 04:25:18.834154', 'step': 10132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:18.876685', 'step': 10132, 'epoch': 2} {'type': 'loss', 'content': 0.0778338611125946, 'timestamp': '2025-10-01 04:25:18.878648', 'step': 10133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:18.924256', 'step': 10133, 'epoch': 2} {'type': 'loss', 'content': 0.09905383735895157, 'timestamp': '2025-10-01 04:25:18.926424', 'step': 10134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:18.973822', 'step': 10134, 'epoch': 2} {'type': 'loss', 'content': 0.1196199432015419, 'timestamp': '2025-10-01 04:25:18.975992', 'step': 10135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:25:19.023581', 'step': 10135, 'epoch': 2} {'type': 'loss', 'content': 0.06800206005573273, 'timestamp': '2025-10-01 04:25:19.049341', 'step': 10136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:19.101046', 'step': 10136, 'epoch': 2} {'type': 'loss', 'content': 0.11111816018819809, 'timestamp': '2025-10-01 04:25:19.103096', 'step': 10137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:19.140864', 'step': 10137, 'epoch': 2} {'type': 'loss', 'content': 0.08384546637535095, 'timestamp': '2025-10-01 04:25:19.142782', 'step': 10138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:19.179312', 'step': 10138, 'epoch': 2} {'type': 'loss', 'content': 0.05434459075331688, 'timestamp': '2025-10-01 04:25:19.181493', 'step': 10139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:19.216245', 'step': 10139, 'epoch': 2} {'type': 'loss', 'content': 0.11808750778436661, 'timestamp': '2025-10-01 04:25:19.239848', 'step': 10140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:19.283455', 'step': 10140, 'epoch': 2} {'type': 'loss', 'content': 0.04006742313504219, 'timestamp': '2025-10-01 04:25:19.285736', 'step': 10141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:19.319179', 'step': 10141, 'epoch': 2} {'type': 'loss', 'content': 0.10468713939189911, 'timestamp': '2025-10-01 04:25:19.331810', 'step': 10142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:19.375749', 'step': 10142, 'epoch': 2} {'type': 'loss', 'content': 0.11709307134151459, 'timestamp': '2025-10-01 04:25:19.377826', 'step': 10143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:19.415357', 'step': 10143, 'epoch': 2} {'type': 'loss', 'content': 0.120489202439785, 'timestamp': '2025-10-01 04:25:19.439121', 'step': 10144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:19.483894', 'step': 10144, 'epoch': 2} {'type': 'loss', 'content': 0.04544341564178467, 'timestamp': '2025-10-01 04:25:19.486020', 'step': 10145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:19.530464', 'step': 10145, 'epoch': 2} {'type': 'loss', 'content': 0.08231402188539505, 'timestamp': '2025-10-01 04:25:19.532786', 'step': 10146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:19.571163', 'step': 10146, 'epoch': 2} {'type': 'loss', 'content': 0.10149431973695755, 'timestamp': '2025-10-01 04:25:19.573186', 'step': 10147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:19.608997', 'step': 10147, 'epoch': 2} {'type': 'loss', 'content': 0.1204981654882431, 'timestamp': '2025-10-01 04:25:19.632597', 'step': 10148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:19.677273', 'step': 10148, 'epoch': 2} {'type': 'loss', 'content': 0.0718948170542717, 'timestamp': '2025-10-01 04:25:19.679523', 'step': 10149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:19.714768', 'step': 10149, 'epoch': 2} {'type': 'loss', 'content': 0.1153629720211029, 'timestamp': '2025-10-01 04:25:19.717565', 'step': 10150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:19.753666', 'step': 10150, 'epoch': 2} {'type': 'loss', 'content': 0.1237855926156044, 'timestamp': '2025-10-01 04:25:19.755856', 'step': 10151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:19.788357', 'step': 10151, 'epoch': 2} {'type': 'loss', 'content': 0.14131133258342743, 'timestamp': '2025-10-01 04:25:19.812106', 'step': 10152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:19.846166', 'step': 10152, 'epoch': 2} {'type': 'loss', 'content': 0.03706926107406616, 'timestamp': '2025-10-01 04:25:19.848247', 'step': 10153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:19.884451', 'step': 10153, 'epoch': 2} {'type': 'loss', 'content': 0.1137998104095459, 'timestamp': '2025-10-01 04:25:19.886792', 'step': 10154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:19.922631', 'step': 10154, 'epoch': 2} {'type': 'loss', 'content': 0.08592702448368073, 'timestamp': '2025-10-01 04:25:19.924740', 'step': 10155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:19.959371', 'step': 10155, 'epoch': 2} {'type': 'loss', 'content': 0.10206251591444016, 'timestamp': '2025-10-01 04:25:19.982818', 'step': 10156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:20.028206', 'step': 10156, 'epoch': 2} {'type': 'loss', 'content': 0.10549178719520569, 'timestamp': '2025-10-01 04:25:20.031268', 'step': 10157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:20.070456', 'step': 10157, 'epoch': 2} {'type': 'loss', 'content': 0.11409856379032135, 'timestamp': '2025-10-01 04:25:20.072943', 'step': 10158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:20.107558', 'step': 10158, 'epoch': 2} {'type': 'loss', 'content': 0.15544778108596802, 'timestamp': '2025-10-01 04:25:20.110471', 'step': 10159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:20.149276', 'step': 10159, 'epoch': 2} {'type': 'loss', 'content': 0.08172678202390671, 'timestamp': '2025-10-01 04:25:20.172885', 'step': 10160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:20.220632', 'step': 10160, 'epoch': 2} {'type': 'loss', 'content': 0.04547491669654846, 'timestamp': '2025-10-01 04:25:20.222854', 'step': 10161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:20.256529', 'step': 10161, 'epoch': 2} {'type': 'loss', 'content': 0.0982416644692421, 'timestamp': '2025-10-01 04:25:20.258490', 'step': 10162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:20.302176', 'step': 10162, 'epoch': 2} {'type': 'loss', 'content': 0.18461352586746216, 'timestamp': '2025-10-01 04:25:20.304378', 'step': 10163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:20.350813', 'step': 10163, 'epoch': 2} {'type': 'loss', 'content': 0.10906803607940674, 'timestamp': '2025-10-01 04:25:20.374597', 'step': 10164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:20.407968', 'step': 10164, 'epoch': 2} {'type': 'loss', 'content': 0.16396163403987885, 'timestamp': '2025-10-01 04:25:20.410220', 'step': 10165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:20.454157', 'step': 10165, 'epoch': 2} {'type': 'loss', 'content': 0.115662582218647, 'timestamp': '2025-10-01 04:25:20.456166', 'step': 10166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:20.492562', 'step': 10166, 'epoch': 2} {'type': 'loss', 'content': 0.18050242960453033, 'timestamp': '2025-10-01 04:25:20.494585', 'step': 10167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:20.542694', 'step': 10167, 'epoch': 2} {'type': 'loss', 'content': 0.13888780772686005, 'timestamp': '2025-10-01 04:25:20.566303', 'step': 10168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:20.602153', 'step': 10168, 'epoch': 2} {'type': 'loss', 'content': 0.10613125562667847, 'timestamp': '2025-10-01 04:25:20.604365', 'step': 10169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:20.643857', 'step': 10169, 'epoch': 2} {'type': 'loss', 'content': 0.21682225167751312, 'timestamp': '2025-10-01 04:25:20.646314', 'step': 10170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:20.683062', 'step': 10170, 'epoch': 2} {'type': 'loss', 'content': 0.14961500465869904, 'timestamp': '2025-10-01 04:25:20.685166', 'step': 10171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:20.718685', 'step': 10171, 'epoch': 2} {'type': 'loss', 'content': 0.12858307361602783, 'timestamp': '2025-10-01 04:25:20.742451', 'step': 10172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:20.778748', 'step': 10172, 'epoch': 2} {'type': 'loss', 'content': 0.1832311749458313, 'timestamp': '2025-10-01 04:25:20.780871', 'step': 10173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:20.831263', 'step': 10173, 'epoch': 2} {'type': 'loss', 'content': 0.05601828172802925, 'timestamp': '2025-10-01 04:25:20.834243', 'step': 10174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:20.869394', 'step': 10174, 'epoch': 2} {'type': 'loss', 'content': 0.1407357156276703, 'timestamp': '2025-10-01 04:25:20.871832', 'step': 10175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:20.905938', 'step': 10175, 'epoch': 2} {'type': 'loss', 'content': 0.11185327917337418, 'timestamp': '2025-10-01 04:25:20.929835', 'step': 10176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:20.962756', 'step': 10176, 'epoch': 2} {'type': 'loss', 'content': 0.1236388087272644, 'timestamp': '2025-10-01 04:25:20.964875', 'step': 10177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:20.999810', 'step': 10177, 'epoch': 2} {'type': 'loss', 'content': 0.16138774156570435, 'timestamp': '2025-10-01 04:25:21.001960', 'step': 10178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:21.036615', 'step': 10178, 'epoch': 2} {'type': 'loss', 'content': 0.05810799449682236, 'timestamp': '2025-10-01 04:25:21.048761', 'step': 10179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:21.083147', 'step': 10179, 'epoch': 2} {'type': 'loss', 'content': 0.23481477797031403, 'timestamp': '2025-10-01 04:25:21.108428', 'step': 10180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:21.142530', 'step': 10180, 'epoch': 2} {'type': 'loss', 'content': 0.147027850151062, 'timestamp': '2025-10-01 04:25:21.145323', 'step': 10181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:21.180339', 'step': 10181, 'epoch': 2} {'type': 'loss', 'content': 0.14047998189926147, 'timestamp': '2025-10-01 04:25:21.183533', 'step': 10182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:21.222509', 'step': 10182, 'epoch': 2} {'type': 'loss', 'content': 0.1355777382850647, 'timestamp': '2025-10-01 04:25:21.224765', 'step': 10183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:21.264032', 'step': 10183, 'epoch': 2} {'type': 'loss', 'content': 0.08872193843126297, 'timestamp': '2025-10-01 04:25:21.287827', 'step': 10184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:21.334192', 'step': 10184, 'epoch': 2} {'type': 'loss', 'content': 0.16792334616184235, 'timestamp': '2025-10-01 04:25:21.336529', 'step': 10185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:21.373027', 'step': 10185, 'epoch': 2} {'type': 'loss', 'content': 0.07354714721441269, 'timestamp': '2025-10-01 04:25:21.375430', 'step': 10186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:21.422501', 'step': 10186, 'epoch': 2} {'type': 'loss', 'content': 0.1255040019750595, 'timestamp': '2025-10-01 04:25:21.424787', 'step': 10187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:21.458964', 'step': 10187, 'epoch': 2} {'type': 'loss', 'content': 0.13674740493297577, 'timestamp': '2025-10-01 04:25:21.482613', 'step': 10188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:21.517409', 'step': 10188, 'epoch': 2} {'type': 'loss', 'content': 0.10640067607164383, 'timestamp': '2025-10-01 04:25:21.520327', 'step': 10189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:21.556010', 'step': 10189, 'epoch': 2} {'type': 'loss', 'content': 0.08285065740346909, 'timestamp': '2025-10-01 04:25:21.558541', 'step': 10190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:21.592739', 'step': 10190, 'epoch': 2} {'type': 'loss', 'content': 0.23919284343719482, 'timestamp': '2025-10-01 04:25:21.595325', 'step': 10191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:21.628533', 'step': 10191, 'epoch': 2} {'type': 'loss', 'content': 0.10621797293424606, 'timestamp': '2025-10-01 04:25:21.652664', 'step': 10192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:21.697699', 'step': 10192, 'epoch': 2} {'type': 'loss', 'content': 0.10241562873125076, 'timestamp': '2025-10-01 04:25:21.699926', 'step': 10193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:21.735687', 'step': 10193, 'epoch': 2} {'type': 'loss', 'content': 0.1260179728269577, 'timestamp': '2025-10-01 04:25:21.738059', 'step': 10194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:21.774472', 'step': 10194, 'epoch': 2} {'type': 'loss', 'content': 0.15504829585552216, 'timestamp': '2025-10-01 04:25:21.787171', 'step': 10195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:21.822862', 'step': 10195, 'epoch': 2} {'type': 'loss', 'content': 0.14861169457435608, 'timestamp': '2025-10-01 04:25:21.846713', 'step': 10196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:21.894146', 'step': 10196, 'epoch': 2} {'type': 'loss', 'content': 0.05616246163845062, 'timestamp': '2025-10-01 04:25:21.896769', 'step': 10197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:21.945855', 'step': 10197, 'epoch': 2} {'type': 'loss', 'content': 0.14589276909828186, 'timestamp': '2025-10-01 04:25:21.949870', 'step': 10198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:21.995228', 'step': 10198, 'epoch': 2} {'type': 'loss', 'content': 0.09293349087238312, 'timestamp': '2025-10-01 04:25:21.997318', 'step': 10199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:22.032273', 'step': 10199, 'epoch': 2} {'type': 'loss', 'content': 0.1468292474746704, 'timestamp': '2025-10-01 04:25:22.056083', 'step': 10200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:22.090280', 'step': 10200, 'epoch': 2} {'type': 'loss', 'content': 0.16572323441505432, 'timestamp': '2025-10-01 04:25:22.103551', 'step': 10201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:22.149266', 'step': 10201, 'epoch': 2} {'type': 'loss', 'content': 0.06646732985973358, 'timestamp': '2025-10-01 04:25:22.151447', 'step': 10202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:22.185820', 'step': 10202, 'epoch': 2} {'type': 'loss', 'content': 0.1173349991440773, 'timestamp': '2025-10-01 04:25:22.190653', 'step': 10203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:22.225344', 'step': 10203, 'epoch': 2} {'type': 'loss', 'content': 0.08362670242786407, 'timestamp': '2025-10-01 04:25:22.250317', 'step': 10204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:22.283561', 'step': 10204, 'epoch': 2} {'type': 'loss', 'content': 0.13258275389671326, 'timestamp': '2025-10-01 04:25:22.285602', 'step': 10205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:22.320311', 'step': 10205, 'epoch': 2} {'type': 'loss', 'content': 0.07278408110141754, 'timestamp': '2025-10-01 04:25:22.322267', 'step': 10206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:22.358350', 'step': 10206, 'epoch': 2} {'type': 'loss', 'content': 0.08139999210834503, 'timestamp': '2025-10-01 04:25:22.360981', 'step': 10207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:22.404304', 'step': 10207, 'epoch': 2} {'type': 'loss', 'content': 0.09112673252820969, 'timestamp': '2025-10-01 04:25:22.439516', 'step': 10208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:25:22.483247', 'step': 10208, 'epoch': 2} {'type': 'loss', 'content': 0.10776876658201218, 'timestamp': '2025-10-01 04:25:22.488002', 'step': 10209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:22.521633', 'step': 10209, 'epoch': 2} {'type': 'loss', 'content': 0.10155614465475082, 'timestamp': '2025-10-01 04:25:22.524485', 'step': 10210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:22.568396', 'step': 10210, 'epoch': 2} {'type': 'loss', 'content': 0.0890759825706482, 'timestamp': '2025-10-01 04:25:22.570509', 'step': 10211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:22.618885', 'step': 10211, 'epoch': 2} {'type': 'loss', 'content': 0.06185325235128403, 'timestamp': '2025-10-01 04:25:22.642773', 'step': 10212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:22.683677', 'step': 10212, 'epoch': 2} {'type': 'loss', 'content': 0.06085656210780144, 'timestamp': '2025-10-01 04:25:22.685848', 'step': 10213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:22.721768', 'step': 10213, 'epoch': 2} {'type': 'loss', 'content': 0.19488508999347687, 'timestamp': '2025-10-01 04:25:22.723877', 'step': 10214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:22.767208', 'step': 10214, 'epoch': 2} {'type': 'loss', 'content': 0.15600861608982086, 'timestamp': '2025-10-01 04:25:22.769286', 'step': 10215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:22.804537', 'step': 10215, 'epoch': 2} {'type': 'loss', 'content': 0.09402585029602051, 'timestamp': '2025-10-01 04:25:22.828268', 'step': 10216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:22.868464', 'step': 10216, 'epoch': 2} {'type': 'loss', 'content': 0.10343122482299805, 'timestamp': '2025-10-01 04:25:22.871320', 'step': 10217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:22.907513', 'step': 10217, 'epoch': 2} {'type': 'loss', 'content': 0.1635993868112564, 'timestamp': '2025-10-01 04:25:22.919576', 'step': 10218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:22.960135', 'step': 10218, 'epoch': 2} {'type': 'loss', 'content': 0.11014344543218613, 'timestamp': '2025-10-01 04:25:22.962508', 'step': 10219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:22.996985', 'step': 10219, 'epoch': 2} {'type': 'loss', 'content': 0.056605711579322815, 'timestamp': '2025-10-01 04:25:23.020464', 'step': 10220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:23.057202', 'step': 10220, 'epoch': 2} {'type': 'loss', 'content': 0.1279744654893875, 'timestamp': '2025-10-01 04:25:23.059373', 'step': 10221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:23.094645', 'step': 10221, 'epoch': 2} {'type': 'loss', 'content': 0.08427765220403671, 'timestamp': '2025-10-01 04:25:23.096867', 'step': 10222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:23.132579', 'step': 10222, 'epoch': 2} {'type': 'loss', 'content': 0.07112888246774673, 'timestamp': '2025-10-01 04:25:23.134751', 'step': 10223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:23.186646', 'step': 10223, 'epoch': 2} {'type': 'loss', 'content': 0.14956389367580414, 'timestamp': '2025-10-01 04:25:23.210148', 'step': 10224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:23.256616', 'step': 10224, 'epoch': 2} {'type': 'loss', 'content': 0.13852287828922272, 'timestamp': '2025-10-01 04:25:23.259010', 'step': 10225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:23.294220', 'step': 10225, 'epoch': 2} {'type': 'loss', 'content': 0.16004692018032074, 'timestamp': '2025-10-01 04:25:23.297115', 'step': 10226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:23.334324', 'step': 10226, 'epoch': 2} {'type': 'loss', 'content': 0.13484759628772736, 'timestamp': '2025-10-01 04:25:23.337295', 'step': 10227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:23.373567', 'step': 10227, 'epoch': 2} {'type': 'loss', 'content': 0.07932569086551666, 'timestamp': '2025-10-01 04:25:23.397061', 'step': 10228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:23.431309', 'step': 10228, 'epoch': 2} {'type': 'loss', 'content': 0.1325630098581314, 'timestamp': '2025-10-01 04:25:23.433299', 'step': 10229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:23.467945', 'step': 10229, 'epoch': 2} {'type': 'loss', 'content': 0.17109133303165436, 'timestamp': '2025-10-01 04:25:23.470112', 'step': 10230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:23.505286', 'step': 10230, 'epoch': 2} {'type': 'loss', 'content': 0.06766526401042938, 'timestamp': '2025-10-01 04:25:23.507702', 'step': 10231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:23.541235', 'step': 10231, 'epoch': 2} {'type': 'loss', 'content': 0.19608266651630402, 'timestamp': '2025-10-01 04:25:23.565628', 'step': 10232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:23.602885', 'step': 10232, 'epoch': 2} {'type': 'loss', 'content': 0.05436500534415245, 'timestamp': '2025-10-01 04:25:23.605017', 'step': 10233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:23.639601', 'step': 10233, 'epoch': 2} {'type': 'loss', 'content': 0.1617128998041153, 'timestamp': '2025-10-01 04:25:23.652474', 'step': 10234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:23.688455', 'step': 10234, 'epoch': 2} {'type': 'loss', 'content': 0.12509647011756897, 'timestamp': '2025-10-01 04:25:23.691175', 'step': 10235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:23.726851', 'step': 10235, 'epoch': 2} {'type': 'loss', 'content': 0.0796138122677803, 'timestamp': '2025-10-01 04:25:23.750519', 'step': 10236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:23.785153', 'step': 10236, 'epoch': 2} {'type': 'loss', 'content': 0.1383683681488037, 'timestamp': '2025-10-01 04:25:23.787283', 'step': 10237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:23.822290', 'step': 10237, 'epoch': 2} {'type': 'loss', 'content': 0.11526693403720856, 'timestamp': '2025-10-01 04:25:23.824538', 'step': 10238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:23.859691', 'step': 10238, 'epoch': 2} {'type': 'loss', 'content': 0.07373309880495071, 'timestamp': '2025-10-01 04:25:23.863144', 'step': 10239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:23.898479', 'step': 10239, 'epoch': 2} {'type': 'loss', 'content': 0.16778770089149475, 'timestamp': '2025-10-01 04:25:23.921990', 'step': 10240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:23.957495', 'step': 10240, 'epoch': 2} {'type': 'loss', 'content': 0.07388961315155029, 'timestamp': '2025-10-01 04:25:23.959637', 'step': 10241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:23.996386', 'step': 10241, 'epoch': 2} {'type': 'loss', 'content': 0.13108883798122406, 'timestamp': '2025-10-01 04:25:23.998351', 'step': 10242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:24.031270', 'step': 10242, 'epoch': 2} {'type': 'loss', 'content': 0.15502005815505981, 'timestamp': '2025-10-01 04:25:24.033227', 'step': 10243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:24.093153', 'step': 10243, 'epoch': 2} {'type': 'loss', 'content': 0.14115609228610992, 'timestamp': '2025-10-01 04:25:24.116804', 'step': 10244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:24.165404', 'step': 10244, 'epoch': 2} {'type': 'loss', 'content': 0.05265970155596733, 'timestamp': '2025-10-01 04:25:24.167544', 'step': 10245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:24.202970', 'step': 10245, 'epoch': 2} {'type': 'loss', 'content': 0.20941434800624847, 'timestamp': '2025-10-01 04:25:24.204979', 'step': 10246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:24.240221', 'step': 10246, 'epoch': 2} {'type': 'loss', 'content': 0.1258241981267929, 'timestamp': '2025-10-01 04:25:24.243905', 'step': 10247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:24.277709', 'step': 10247, 'epoch': 2} {'type': 'loss', 'content': 0.09225258231163025, 'timestamp': '2025-10-01 04:25:24.301272', 'step': 10248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:24.335801', 'step': 10248, 'epoch': 2} {'type': 'loss', 'content': 0.08989708125591278, 'timestamp': '2025-10-01 04:25:24.337994', 'step': 10249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:24.374282', 'step': 10249, 'epoch': 2} {'type': 'loss', 'content': 0.17427712678909302, 'timestamp': '2025-10-01 04:25:24.376816', 'step': 10250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:24.419566', 'step': 10250, 'epoch': 2} {'type': 'loss', 'content': 0.08321802318096161, 'timestamp': '2025-10-01 04:25:24.421874', 'step': 10251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:24.457987', 'step': 10251, 'epoch': 2} {'type': 'loss', 'content': 0.2746492326259613, 'timestamp': '2025-10-01 04:25:24.481562', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:25:36.765916', 'step': 10252, 'epoch': 2} {'type': 'pplx', 'content': 14075.245506532528, 'timestamp': '2025-10-01 04:25:36.769280', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:36.811570', 'step': 10252, 'epoch': 2} {'type': 'loss', 'content': 0.12018625438213348, 'timestamp': '2025-10-01 04:25:36.813780', 'step': 10253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:36.863441', 'step': 10253, 'epoch': 2} {'type': 'loss', 'content': 0.13116593658924103, 'timestamp': '2025-10-01 04:25:36.865834', 'step': 10254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:36.901007', 'step': 10254, 'epoch': 2} {'type': 'loss', 'content': 0.08539248257875443, 'timestamp': '2025-10-01 04:25:36.903167', 'step': 10255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:36.938637', 'step': 10255, 'epoch': 2} {'type': 'loss', 'content': 0.08105474710464478, 'timestamp': '2025-10-01 04:25:36.962383', 'step': 10256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:36.998432', 'step': 10256, 'epoch': 2} {'type': 'loss', 'content': 0.11061038821935654, 'timestamp': '2025-10-01 04:25:37.000721', 'step': 10257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:25:37.035744', 'step': 10257, 'epoch': 2} {'type': 'loss', 'content': 0.13064734637737274, 'timestamp': '2025-10-01 04:25:37.042594', 'step': 10258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:37.078255', 'step': 10258, 'epoch': 2} {'type': 'loss', 'content': 0.037404876202344894, 'timestamp': '2025-10-01 04:25:37.080602', 'step': 10259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:37.116774', 'step': 10259, 'epoch': 2} {'type': 'loss', 'content': 0.10256779938936234, 'timestamp': '2025-10-01 04:25:37.140643', 'step': 10260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:37.185368', 'step': 10260, 'epoch': 2} {'type': 'loss', 'content': 0.05128825828433037, 'timestamp': '2025-10-01 04:25:37.190116', 'step': 10261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:37.235212', 'step': 10261, 'epoch': 2} {'type': 'loss', 'content': 0.12443704158067703, 'timestamp': '2025-10-01 04:25:37.237472', 'step': 10262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:37.272508', 'step': 10262, 'epoch': 2} {'type': 'loss', 'content': 0.051489293575286865, 'timestamp': '2025-10-01 04:25:37.274673', 'step': 10263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:37.309026', 'step': 10263, 'epoch': 2} {'type': 'loss', 'content': 0.130940780043602, 'timestamp': '2025-10-01 04:25:37.332723', 'step': 10264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:37.372444', 'step': 10264, 'epoch': 2} {'type': 'loss', 'content': 0.14345380663871765, 'timestamp': '2025-10-01 04:25:37.374649', 'step': 10265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:37.408326', 'step': 10265, 'epoch': 2} {'type': 'loss', 'content': 0.08105748146772385, 'timestamp': '2025-10-01 04:25:37.410635', 'step': 10266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:37.446728', 'step': 10266, 'epoch': 2} {'type': 'loss', 'content': 0.17571304738521576, 'timestamp': '2025-10-01 04:25:37.449482', 'step': 10267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:37.494858', 'step': 10267, 'epoch': 2} {'type': 'loss', 'content': 0.1596740335226059, 'timestamp': '2025-10-01 04:25:37.518542', 'step': 10268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:37.553853', 'step': 10268, 'epoch': 2} {'type': 'loss', 'content': 0.15891683101654053, 'timestamp': '2025-10-01 04:25:37.556329', 'step': 10269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:37.591430', 'step': 10269, 'epoch': 2} {'type': 'loss', 'content': 0.1430785357952118, 'timestamp': '2025-10-01 04:25:37.593972', 'step': 10270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:37.633852', 'step': 10270, 'epoch': 2} {'type': 'loss', 'content': 0.06756716966629028, 'timestamp': '2025-10-01 04:25:37.636167', 'step': 10271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:37.682576', 'step': 10271, 'epoch': 2} {'type': 'loss', 'content': 0.04099056497216225, 'timestamp': '2025-10-01 04:25:37.706244', 'step': 10272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:37.741567', 'step': 10272, 'epoch': 2} {'type': 'loss', 'content': 0.10627268999814987, 'timestamp': '2025-10-01 04:25:37.744137', 'step': 10273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:37.780088', 'step': 10273, 'epoch': 2} {'type': 'loss', 'content': 0.0683092251420021, 'timestamp': '2025-10-01 04:25:37.783423', 'step': 10274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:37.818483', 'step': 10274, 'epoch': 2} {'type': 'loss', 'content': 0.09104187041521072, 'timestamp': '2025-10-01 04:25:37.820850', 'step': 10275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:37.859370', 'step': 10275, 'epoch': 2} {'type': 'loss', 'content': 0.12315879762172699, 'timestamp': '2025-10-01 04:25:37.883561', 'step': 10276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:37.933907', 'step': 10276, 'epoch': 2} {'type': 'loss', 'content': 0.07960564643144608, 'timestamp': '2025-10-01 04:25:37.936654', 'step': 10277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:37.983569', 'step': 10277, 'epoch': 2} {'type': 'loss', 'content': 0.07064612954854965, 'timestamp': '2025-10-01 04:25:37.986339', 'step': 10278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:38.022253', 'step': 10278, 'epoch': 2} {'type': 'loss', 'content': 0.2025952786207199, 'timestamp': '2025-10-01 04:25:38.025090', 'step': 10279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:38.064235', 'step': 10279, 'epoch': 2} {'type': 'loss', 'content': 0.134791299700737, 'timestamp': '2025-10-01 04:25:38.091730', 'step': 10280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:38.145529', 'step': 10280, 'epoch': 2} {'type': 'loss', 'content': 0.1254926323890686, 'timestamp': '2025-10-01 04:25:38.148274', 'step': 10281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:38.182289', 'step': 10281, 'epoch': 2} {'type': 'loss', 'content': 0.19523757696151733, 'timestamp': '2025-10-01 04:25:38.184562', 'step': 10282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:38.224094', 'step': 10282, 'epoch': 2} {'type': 'loss', 'content': 0.10495854169130325, 'timestamp': '2025-10-01 04:25:38.226749', 'step': 10283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:38.264266', 'step': 10283, 'epoch': 2} {'type': 'loss', 'content': 0.07030065357685089, 'timestamp': '2025-10-01 04:25:38.288501', 'step': 10284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:38.333992', 'step': 10284, 'epoch': 2} {'type': 'loss', 'content': 0.12055987864732742, 'timestamp': '2025-10-01 04:25:38.336649', 'step': 10285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:38.377132', 'step': 10285, 'epoch': 2} {'type': 'loss', 'content': 0.09921757876873016, 'timestamp': '2025-10-01 04:25:38.379673', 'step': 10286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:38.425728', 'step': 10286, 'epoch': 2} {'type': 'loss', 'content': 0.11538629978895187, 'timestamp': '2025-10-01 04:25:38.431088', 'step': 10287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:38.470288', 'step': 10287, 'epoch': 2} {'type': 'loss', 'content': 0.058588720858097076, 'timestamp': '2025-10-01 04:25:38.494753', 'step': 10288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:38.545124', 'step': 10288, 'epoch': 2} {'type': 'loss', 'content': 0.027845490723848343, 'timestamp': '2025-10-01 04:25:38.548561', 'step': 10289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:38.581371', 'step': 10289, 'epoch': 2} {'type': 'loss', 'content': 0.19269195199012756, 'timestamp': '2025-10-01 04:25:38.583981', 'step': 10290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:38.632217', 'step': 10290, 'epoch': 2} {'type': 'loss', 'content': 0.1286298930644989, 'timestamp': '2025-10-01 04:25:38.634848', 'step': 10291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:38.669919', 'step': 10291, 'epoch': 2} {'type': 'loss', 'content': 0.09587082266807556, 'timestamp': '2025-10-01 04:25:38.694022', 'step': 10292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:38.740253', 'step': 10292, 'epoch': 2} {'type': 'loss', 'content': 0.07248979806900024, 'timestamp': '2025-10-01 04:25:38.743036', 'step': 10293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:38.792484', 'step': 10293, 'epoch': 2} {'type': 'loss', 'content': 0.07639582455158234, 'timestamp': '2025-10-01 04:25:38.794866', 'step': 10294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:38.837788', 'step': 10294, 'epoch': 2} {'type': 'loss', 'content': 0.049609821289777756, 'timestamp': '2025-10-01 04:25:38.840272', 'step': 10295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:38.877273', 'step': 10295, 'epoch': 2} {'type': 'loss', 'content': 0.10954932868480682, 'timestamp': '2025-10-01 04:25:38.900822', 'step': 10296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:38.938892', 'step': 10296, 'epoch': 2} {'type': 'loss', 'content': 0.15355496108531952, 'timestamp': '2025-10-01 04:25:38.942905', 'step': 10297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:38.990201', 'step': 10297, 'epoch': 2} {'type': 'loss', 'content': 0.15355414152145386, 'timestamp': '2025-10-01 04:25:38.992497', 'step': 10298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:39.031167', 'step': 10298, 'epoch': 2} {'type': 'loss', 'content': 0.09238556027412415, 'timestamp': '2025-10-01 04:25:39.033253', 'step': 10299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:39.073630', 'step': 10299, 'epoch': 2} {'type': 'loss', 'content': 0.13041388988494873, 'timestamp': '2025-10-01 04:25:39.097451', 'step': 10300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:39.131725', 'step': 10300, 'epoch': 2} {'type': 'loss', 'content': 0.1888030767440796, 'timestamp': '2025-10-01 04:25:39.133917', 'step': 10301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:39.173440', 'step': 10301, 'epoch': 2} {'type': 'loss', 'content': 0.0932236984372139, 'timestamp': '2025-10-01 04:25:39.175669', 'step': 10302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:39.213433', 'step': 10302, 'epoch': 2} {'type': 'loss', 'content': 0.0705583393573761, 'timestamp': '2025-10-01 04:25:39.215929', 'step': 10303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:39.263463', 'step': 10303, 'epoch': 2} {'type': 'loss', 'content': 0.11164292693138123, 'timestamp': '2025-10-01 04:25:39.287319', 'step': 10304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:39.329646', 'step': 10304, 'epoch': 2} {'type': 'loss', 'content': 0.07116717845201492, 'timestamp': '2025-10-01 04:25:39.332527', 'step': 10305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:39.383311', 'step': 10305, 'epoch': 2} {'type': 'loss', 'content': 0.1896287351846695, 'timestamp': '2025-10-01 04:25:39.391062', 'step': 10306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:39.426683', 'step': 10306, 'epoch': 2} {'type': 'loss', 'content': 0.12936778366565704, 'timestamp': '2025-10-01 04:25:39.428981', 'step': 10307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:39.480942', 'step': 10307, 'epoch': 2} {'type': 'loss', 'content': 0.0688994973897934, 'timestamp': '2025-10-01 04:25:39.504477', 'step': 10308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:39.539909', 'step': 10308, 'epoch': 2} {'type': 'loss', 'content': 0.13086959719657898, 'timestamp': '2025-10-01 04:25:39.543266', 'step': 10309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:39.580482', 'step': 10309, 'epoch': 2} {'type': 'loss', 'content': 0.10679548978805542, 'timestamp': '2025-10-01 04:25:39.582717', 'step': 10310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:39.618411', 'step': 10310, 'epoch': 2} {'type': 'loss', 'content': 0.08701100945472717, 'timestamp': '2025-10-01 04:25:39.621474', 'step': 10311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:39.671238', 'step': 10311, 'epoch': 2} {'type': 'loss', 'content': 0.08241571485996246, 'timestamp': '2025-10-01 04:25:39.696108', 'step': 10312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:39.757061', 'step': 10312, 'epoch': 2} {'type': 'loss', 'content': 0.09397745877504349, 'timestamp': '2025-10-01 04:25:39.759812', 'step': 10313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:39.797812', 'step': 10313, 'epoch': 2} {'type': 'loss', 'content': 0.09582450985908508, 'timestamp': '2025-10-01 04:25:39.820018', 'step': 10314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:39.883570', 'step': 10314, 'epoch': 2} {'type': 'loss', 'content': 0.1038142740726471, 'timestamp': '2025-10-01 04:25:39.885774', 'step': 10315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:39.933572', 'step': 10315, 'epoch': 2} {'type': 'loss', 'content': 0.09016090631484985, 'timestamp': '2025-10-01 04:25:39.958347', 'step': 10316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:40.004867', 'step': 10316, 'epoch': 2} {'type': 'loss', 'content': 0.14205867052078247, 'timestamp': '2025-10-01 04:25:40.007051', 'step': 10317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:40.041862', 'step': 10317, 'epoch': 2} {'type': 'loss', 'content': 0.09157495200634003, 'timestamp': '2025-10-01 04:25:40.046981', 'step': 10318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:40.083094', 'step': 10318, 'epoch': 2} {'type': 'loss', 'content': 0.2018614560365677, 'timestamp': '2025-10-01 04:25:40.085236', 'step': 10319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:40.118870', 'step': 10319, 'epoch': 2} {'type': 'loss', 'content': 0.1494194120168686, 'timestamp': '2025-10-01 04:25:40.146753', 'step': 10320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:40.180868', 'step': 10320, 'epoch': 2} {'type': 'loss', 'content': 0.10365256667137146, 'timestamp': '2025-10-01 04:25:40.183172', 'step': 10321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:40.231970', 'step': 10321, 'epoch': 2} {'type': 'loss', 'content': 0.14959585666656494, 'timestamp': '2025-10-01 04:25:40.241976', 'step': 10322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:40.277444', 'step': 10322, 'epoch': 2} {'type': 'loss', 'content': 0.0533229224383831, 'timestamp': '2025-10-01 04:25:40.279568', 'step': 10323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:40.313557', 'step': 10323, 'epoch': 2} {'type': 'loss', 'content': 0.09923360496759415, 'timestamp': '2025-10-01 04:25:40.337129', 'step': 10324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:40.384349', 'step': 10324, 'epoch': 2} {'type': 'loss', 'content': 0.1078595519065857, 'timestamp': '2025-10-01 04:25:40.386463', 'step': 10325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:40.433468', 'step': 10325, 'epoch': 2} {'type': 'loss', 'content': 0.13950450718402863, 'timestamp': '2025-10-01 04:25:40.447359', 'step': 10326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:40.494452', 'step': 10326, 'epoch': 2} {'type': 'loss', 'content': 0.1141873449087143, 'timestamp': '2025-10-01 04:25:40.496642', 'step': 10327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:40.530594', 'step': 10327, 'epoch': 2} {'type': 'loss', 'content': 0.10425946116447449, 'timestamp': '2025-10-01 04:25:40.554562', 'step': 10328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:25:40.591804', 'step': 10328, 'epoch': 2} {'type': 'loss', 'content': 0.18589068949222565, 'timestamp': '2025-10-01 04:25:40.594039', 'step': 10329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:40.648879', 'step': 10329, 'epoch': 2} {'type': 'loss', 'content': 0.06027606129646301, 'timestamp': '2025-10-01 04:25:40.667697', 'step': 10330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:40.703198', 'step': 10330, 'epoch': 2} {'type': 'loss', 'content': 0.08647865056991577, 'timestamp': '2025-10-01 04:25:40.705993', 'step': 10331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:40.756913', 'step': 10331, 'epoch': 2} {'type': 'loss', 'content': 0.08215084671974182, 'timestamp': '2025-10-01 04:25:40.785829', 'step': 10332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:40.822791', 'step': 10332, 'epoch': 2} {'type': 'loss', 'content': 0.15094870328903198, 'timestamp': '2025-10-01 04:25:40.825271', 'step': 10333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:40.861595', 'step': 10333, 'epoch': 2} {'type': 'loss', 'content': 0.07720620185136795, 'timestamp': '2025-10-01 04:25:40.864701', 'step': 10334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:40.900056', 'step': 10334, 'epoch': 2} {'type': 'loss', 'content': 0.07478175312280655, 'timestamp': '2025-10-01 04:25:40.902155', 'step': 10335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:40.938944', 'step': 10335, 'epoch': 2} {'type': 'loss', 'content': 0.13253377377986908, 'timestamp': '2025-10-01 04:25:40.962485', 'step': 10336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:41.000174', 'step': 10336, 'epoch': 2} {'type': 'loss', 'content': 0.17177417874336243, 'timestamp': '2025-10-01 04:25:41.002423', 'step': 10337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:41.043926', 'step': 10337, 'epoch': 2} {'type': 'loss', 'content': 0.1588832139968872, 'timestamp': '2025-10-01 04:25:41.046044', 'step': 10338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:41.081533', 'step': 10338, 'epoch': 2} {'type': 'loss', 'content': 0.10097452253103256, 'timestamp': '2025-10-01 04:25:41.083972', 'step': 10339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:41.138270', 'step': 10339, 'epoch': 2} {'type': 'loss', 'content': 0.13080567121505737, 'timestamp': '2025-10-01 04:25:41.161876', 'step': 10340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:41.217530', 'step': 10340, 'epoch': 2} {'type': 'loss', 'content': 0.12476124614477158, 'timestamp': '2025-10-01 04:25:41.219742', 'step': 10341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:41.256121', 'step': 10341, 'epoch': 2} {'type': 'loss', 'content': 0.17107483744621277, 'timestamp': '2025-10-01 04:25:41.258241', 'step': 10342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:41.330221', 'step': 10342, 'epoch': 2} {'type': 'loss', 'content': 0.10949107259511948, 'timestamp': '2025-10-01 04:25:41.332531', 'step': 10343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:41.371710', 'step': 10343, 'epoch': 2} {'type': 'loss', 'content': 0.1885925978422165, 'timestamp': '2025-10-01 04:25:41.395337', 'step': 10344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:41.430463', 'step': 10344, 'epoch': 2} {'type': 'loss', 'content': 0.07320433109998703, 'timestamp': '2025-10-01 04:25:41.432881', 'step': 10345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:41.468296', 'step': 10345, 'epoch': 2} {'type': 'loss', 'content': 0.04730874300003052, 'timestamp': '2025-10-01 04:25:41.476523', 'step': 10346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:41.510689', 'step': 10346, 'epoch': 2} {'type': 'loss', 'content': 0.11772885173559189, 'timestamp': '2025-10-01 04:25:41.512981', 'step': 10347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:41.564698', 'step': 10347, 'epoch': 2} {'type': 'loss', 'content': 0.08576364070177078, 'timestamp': '2025-10-01 04:25:41.588431', 'step': 10348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:41.622350', 'step': 10348, 'epoch': 2} {'type': 'loss', 'content': 0.11940990388393402, 'timestamp': '2025-10-01 04:25:41.625109', 'step': 10349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:41.658511', 'step': 10349, 'epoch': 2} {'type': 'loss', 'content': 0.09783482551574707, 'timestamp': '2025-10-01 04:25:41.660599', 'step': 10350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:41.697397', 'step': 10350, 'epoch': 2} {'type': 'loss', 'content': 0.07467932254076004, 'timestamp': '2025-10-01 04:25:41.699580', 'step': 10351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:41.733499', 'step': 10351, 'epoch': 2} {'type': 'loss', 'content': 0.0716330036520958, 'timestamp': '2025-10-01 04:25:41.757144', 'step': 10352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:41.808308', 'step': 10352, 'epoch': 2} {'type': 'loss', 'content': 0.07749266922473907, 'timestamp': '2025-10-01 04:25:41.812771', 'step': 10353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:41.848816', 'step': 10353, 'epoch': 2} {'type': 'loss', 'content': 0.1006307527422905, 'timestamp': '2025-10-01 04:25:41.850952', 'step': 10354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:41.894559', 'step': 10354, 'epoch': 2} {'type': 'loss', 'content': 0.09819624572992325, 'timestamp': '2025-10-01 04:25:41.897058', 'step': 10355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:25:41.932008', 'step': 10355, 'epoch': 2} {'type': 'loss', 'content': 0.1771894246339798, 'timestamp': '2025-10-01 04:25:41.957507', 'step': 10356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:41.995357', 'step': 10356, 'epoch': 2} {'type': 'loss', 'content': 0.05368967726826668, 'timestamp': '2025-10-01 04:25:41.997890', 'step': 10357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:42.046174', 'step': 10357, 'epoch': 2} {'type': 'loss', 'content': 0.11793583631515503, 'timestamp': '2025-10-01 04:25:42.049171', 'step': 10358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:42.083761', 'step': 10358, 'epoch': 2} {'type': 'loss', 'content': 0.19118307530879974, 'timestamp': '2025-10-01 04:25:42.085951', 'step': 10359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:42.124371', 'step': 10359, 'epoch': 2} {'type': 'loss', 'content': 0.09966820478439331, 'timestamp': '2025-10-01 04:25:42.148205', 'step': 10360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:42.200151', 'step': 10360, 'epoch': 2} {'type': 'loss', 'content': 0.13155093789100647, 'timestamp': '2025-10-01 04:25:42.202472', 'step': 10361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:42.236302', 'step': 10361, 'epoch': 2} {'type': 'loss', 'content': 0.06615553796291351, 'timestamp': '2025-10-01 04:25:42.238441', 'step': 10362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:42.276267', 'step': 10362, 'epoch': 2} {'type': 'loss', 'content': 0.13234052062034607, 'timestamp': '2025-10-01 04:25:42.278581', 'step': 10363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:42.335450', 'step': 10363, 'epoch': 2} {'type': 'loss', 'content': 0.03715825453400612, 'timestamp': '2025-10-01 04:25:42.359129', 'step': 10364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:42.397325', 'step': 10364, 'epoch': 2} {'type': 'loss', 'content': 0.09848736226558685, 'timestamp': '2025-10-01 04:25:42.399538', 'step': 10365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:42.452390', 'step': 10365, 'epoch': 2} {'type': 'loss', 'content': 0.163368821144104, 'timestamp': '2025-10-01 04:25:42.454944', 'step': 10366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:42.491512', 'step': 10366, 'epoch': 2} {'type': 'loss', 'content': 0.08108444511890411, 'timestamp': '2025-10-01 04:25:42.493834', 'step': 10367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:42.529881', 'step': 10367, 'epoch': 2} {'type': 'loss', 'content': 0.11243845522403717, 'timestamp': '2025-10-01 04:25:42.553424', 'step': 10368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:42.588618', 'step': 10368, 'epoch': 2} {'type': 'loss', 'content': 0.09024128317832947, 'timestamp': '2025-10-01 04:25:42.592142', 'step': 10369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:42.630103', 'step': 10369, 'epoch': 2} {'type': 'loss', 'content': 0.07850806415081024, 'timestamp': '2025-10-01 04:25:42.632166', 'step': 10370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:42.667381', 'step': 10370, 'epoch': 2} {'type': 'loss', 'content': 0.08874959498643875, 'timestamp': '2025-10-01 04:25:42.669639', 'step': 10371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:42.729287', 'step': 10371, 'epoch': 2} {'type': 'loss', 'content': 0.1347607672214508, 'timestamp': '2025-10-01 04:25:42.753574', 'step': 10372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:42.802566', 'step': 10372, 'epoch': 2} {'type': 'loss', 'content': 0.09587813913822174, 'timestamp': '2025-10-01 04:25:42.804769', 'step': 10373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:42.855036', 'step': 10373, 'epoch': 2} {'type': 'loss', 'content': 0.10776729881763458, 'timestamp': '2025-10-01 04:25:42.857322', 'step': 10374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:42.907193', 'step': 10374, 'epoch': 2} {'type': 'loss', 'content': 0.1283859759569168, 'timestamp': '2025-10-01 04:25:42.909524', 'step': 10375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:42.944156', 'step': 10375, 'epoch': 2} {'type': 'loss', 'content': 0.112163245677948, 'timestamp': '2025-10-01 04:25:42.968494', 'step': 10376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:43.006625', 'step': 10376, 'epoch': 2} {'type': 'loss', 'content': 0.058617230504751205, 'timestamp': '2025-10-01 04:25:43.008794', 'step': 10377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:43.046525', 'step': 10377, 'epoch': 2} {'type': 'loss', 'content': 0.11228788644075394, 'timestamp': '2025-10-01 04:25:43.048762', 'step': 10378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:43.113879', 'step': 10378, 'epoch': 2} {'type': 'loss', 'content': 0.13769151270389557, 'timestamp': '2025-10-01 04:25:43.116181', 'step': 10379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:43.152612', 'step': 10379, 'epoch': 2} {'type': 'loss', 'content': 0.16429859399795532, 'timestamp': '2025-10-01 04:25:43.177418', 'step': 10380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:43.226857', 'step': 10380, 'epoch': 2} {'type': 'loss', 'content': 0.17302866280078888, 'timestamp': '2025-10-01 04:25:43.228923', 'step': 10381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:43.263157', 'step': 10381, 'epoch': 2} {'type': 'loss', 'content': 0.08161312341690063, 'timestamp': '2025-10-01 04:25:43.265482', 'step': 10382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:43.299176', 'step': 10382, 'epoch': 2} {'type': 'loss', 'content': 0.08520226180553436, 'timestamp': '2025-10-01 04:25:43.301417', 'step': 10383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:43.335463', 'step': 10383, 'epoch': 2} {'type': 'loss', 'content': 0.12250017374753952, 'timestamp': '2025-10-01 04:25:43.359173', 'step': 10384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:43.394892', 'step': 10384, 'epoch': 2} {'type': 'loss', 'content': 0.16810114681720734, 'timestamp': '2025-10-01 04:25:43.397477', 'step': 10385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:43.433535', 'step': 10385, 'epoch': 2} {'type': 'loss', 'content': 0.11931484937667847, 'timestamp': '2025-10-01 04:25:43.439636', 'step': 10386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:43.473554', 'step': 10386, 'epoch': 2} {'type': 'loss', 'content': 0.0487932525575161, 'timestamp': '2025-10-01 04:25:43.475469', 'step': 10387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:43.511671', 'step': 10387, 'epoch': 2} {'type': 'loss', 'content': 0.0643870085477829, 'timestamp': '2025-10-01 04:25:43.535430', 'step': 10388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:43.572090', 'step': 10388, 'epoch': 2} {'type': 'loss', 'content': 0.15556374192237854, 'timestamp': '2025-10-01 04:25:43.575642', 'step': 10389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:43.610375', 'step': 10389, 'epoch': 2} {'type': 'loss', 'content': 0.11035477370023727, 'timestamp': '2025-10-01 04:25:43.612605', 'step': 10390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:43.648474', 'step': 10390, 'epoch': 2} {'type': 'loss', 'content': 0.1026981920003891, 'timestamp': '2025-10-01 04:25:43.652303', 'step': 10391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:43.688468', 'step': 10391, 'epoch': 2} {'type': 'loss', 'content': 0.12188568711280823, 'timestamp': '2025-10-01 04:25:43.712486', 'step': 10392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:43.748960', 'step': 10392, 'epoch': 2} {'type': 'loss', 'content': 0.14158321917057037, 'timestamp': '2025-10-01 04:25:43.752025', 'step': 10393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:43.799563', 'step': 10393, 'epoch': 2} {'type': 'loss', 'content': 0.1774202287197113, 'timestamp': '2025-10-01 04:25:43.811891', 'step': 10394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:43.848250', 'step': 10394, 'epoch': 2} {'type': 'loss', 'content': 0.08721236139535904, 'timestamp': '2025-10-01 04:25:43.850501', 'step': 10395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:25:43.886473', 'step': 10395, 'epoch': 2} {'type': 'loss', 'content': 0.1598692089319229, 'timestamp': '2025-10-01 04:25:43.912205', 'step': 10396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:43.959452', 'step': 10396, 'epoch': 2} {'type': 'loss', 'content': 0.1918923556804657, 'timestamp': '2025-10-01 04:25:43.961909', 'step': 10397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:44.007494', 'step': 10397, 'epoch': 2} {'type': 'loss', 'content': 0.17750032246112823, 'timestamp': '2025-10-01 04:25:44.009802', 'step': 10398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.061087', 'step': 10398, 'epoch': 2} {'type': 'loss', 'content': 0.12574279308319092, 'timestamp': '2025-10-01 04:25:44.063747', 'step': 10399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:44.106458', 'step': 10399, 'epoch': 2} {'type': 'loss', 'content': 0.1378118246793747, 'timestamp': '2025-10-01 04:25:44.130234', 'step': 10400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.165898', 'step': 10400, 'epoch': 2} {'type': 'loss', 'content': 0.10831417143344879, 'timestamp': '2025-10-01 04:25:44.168131', 'step': 10401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.225398', 'step': 10401, 'epoch': 2} {'type': 'loss', 'content': 0.07939811795949936, 'timestamp': '2025-10-01 04:25:44.228183', 'step': 10402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.264173', 'step': 10402, 'epoch': 2} {'type': 'loss', 'content': 0.09276976436376572, 'timestamp': '2025-10-01 04:25:44.266308', 'step': 10403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:44.301417', 'step': 10403, 'epoch': 2} {'type': 'loss', 'content': 0.06858118623495102, 'timestamp': '2025-10-01 04:25:44.325371', 'step': 10404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.362894', 'step': 10404, 'epoch': 2} {'type': 'loss', 'content': 0.11517682671546936, 'timestamp': '2025-10-01 04:25:44.365721', 'step': 10405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:44.414895', 'step': 10405, 'epoch': 2} {'type': 'loss', 'content': 0.11931764334440231, 'timestamp': '2025-10-01 04:25:44.417075', 'step': 10406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:44.450878', 'step': 10406, 'epoch': 2} {'type': 'loss', 'content': 0.09169077128171921, 'timestamp': '2025-10-01 04:25:44.459630', 'step': 10407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.493771', 'step': 10407, 'epoch': 2} {'type': 'loss', 'content': 0.1020079255104065, 'timestamp': '2025-10-01 04:25:44.517603', 'step': 10408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:44.565783', 'step': 10408, 'epoch': 2} {'type': 'loss', 'content': 0.06841765344142914, 'timestamp': '2025-10-01 04:25:44.570738', 'step': 10409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.608287', 'step': 10409, 'epoch': 2} {'type': 'loss', 'content': 0.08075515180826187, 'timestamp': '2025-10-01 04:25:44.610384', 'step': 10410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.647080', 'step': 10410, 'epoch': 2} {'type': 'loss', 'content': 0.12472671270370483, 'timestamp': '2025-10-01 04:25:44.658948', 'step': 10411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:44.704296', 'step': 10411, 'epoch': 2} {'type': 'loss', 'content': 0.19940988719463348, 'timestamp': '2025-10-01 04:25:44.728184', 'step': 10412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.776922', 'step': 10412, 'epoch': 2} {'type': 'loss', 'content': 0.11586464941501617, 'timestamp': '2025-10-01 04:25:44.779260', 'step': 10413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.813323', 'step': 10413, 'epoch': 2} {'type': 'loss', 'content': 0.09125815331935883, 'timestamp': '2025-10-01 04:25:44.815612', 'step': 10414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:44.850617', 'step': 10414, 'epoch': 2} {'type': 'loss', 'content': 0.09332039952278137, 'timestamp': '2025-10-01 04:25:44.854347', 'step': 10415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:44.886756', 'step': 10415, 'epoch': 2} {'type': 'loss', 'content': 0.07027408480644226, 'timestamp': '2025-10-01 04:25:44.910651', 'step': 10416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:44.956402', 'step': 10416, 'epoch': 2} {'type': 'loss', 'content': 0.12386259436607361, 'timestamp': '2025-10-01 04:25:44.958646', 'step': 10417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:44.992526', 'step': 10417, 'epoch': 2} {'type': 'loss', 'content': 0.21010787785053253, 'timestamp': '2025-10-01 04:25:44.994919', 'step': 10418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:45.030216', 'step': 10418, 'epoch': 2} {'type': 'loss', 'content': 0.12463126331567764, 'timestamp': '2025-10-01 04:25:45.033201', 'step': 10419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:45.080871', 'step': 10419, 'epoch': 2} {'type': 'loss', 'content': 0.18126201629638672, 'timestamp': '2025-10-01 04:25:45.117071', 'step': 10420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:45.150341', 'step': 10420, 'epoch': 2} {'type': 'loss', 'content': 0.1111481562256813, 'timestamp': '2025-10-01 04:25:45.152506', 'step': 10421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:45.192887', 'step': 10421, 'epoch': 2} {'type': 'loss', 'content': 0.13572511076927185, 'timestamp': '2025-10-01 04:25:45.196641', 'step': 10422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:45.238101', 'step': 10422, 'epoch': 2} {'type': 'loss', 'content': 0.1443185955286026, 'timestamp': '2025-10-01 04:25:45.240276', 'step': 10423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:45.275278', 'step': 10423, 'epoch': 2} {'type': 'loss', 'content': 0.12452337890863419, 'timestamp': '2025-10-01 04:25:45.299673', 'step': 10424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:45.333714', 'step': 10424, 'epoch': 2} {'type': 'loss', 'content': 0.15543201565742493, 'timestamp': '2025-10-01 04:25:45.337867', 'step': 10425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:45.380119', 'step': 10425, 'epoch': 2} {'type': 'loss', 'content': 0.15535786747932434, 'timestamp': '2025-10-01 04:25:45.382283', 'step': 10426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:45.415647', 'step': 10426, 'epoch': 2} {'type': 'loss', 'content': 0.11055929958820343, 'timestamp': '2025-10-01 04:25:45.418495', 'step': 10427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:45.452562', 'step': 10427, 'epoch': 2} {'type': 'loss', 'content': 0.06825340539216995, 'timestamp': '2025-10-01 04:25:45.476918', 'step': 10428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:45.524449', 'step': 10428, 'epoch': 2} {'type': 'loss', 'content': 0.10252279788255692, 'timestamp': '2025-10-01 04:25:45.526627', 'step': 10429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:45.570804', 'step': 10429, 'epoch': 2} {'type': 'loss', 'content': 0.10744943469762802, 'timestamp': '2025-10-01 04:25:45.582082', 'step': 10430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:45.623082', 'step': 10430, 'epoch': 2} {'type': 'loss', 'content': 0.16479350626468658, 'timestamp': '2025-10-01 04:25:45.637272', 'step': 10431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:45.678127', 'step': 10431, 'epoch': 2} {'type': 'loss', 'content': 0.09614261239767075, 'timestamp': '2025-10-01 04:25:45.705684', 'step': 10432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:45.758110', 'step': 10432, 'epoch': 2} {'type': 'loss', 'content': 0.15174874663352966, 'timestamp': '2025-10-01 04:25:45.760358', 'step': 10433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:45.807214', 'step': 10433, 'epoch': 2} {'type': 'loss', 'content': 0.0846153199672699, 'timestamp': '2025-10-01 04:25:45.810112', 'step': 10434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:45.845079', 'step': 10434, 'epoch': 2} {'type': 'loss', 'content': 0.10492076724767685, 'timestamp': '2025-10-01 04:25:45.848054', 'step': 10435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:45.884233', 'step': 10435, 'epoch': 2} {'type': 'loss', 'content': 0.057333480566740036, 'timestamp': '2025-10-01 04:25:45.907923', 'step': 10436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:25:45.943955', 'step': 10436, 'epoch': 2} {'type': 'loss', 'content': 0.14107610285282135, 'timestamp': '2025-10-01 04:25:45.948697', 'step': 10437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:45.982856', 'step': 10437, 'epoch': 2} {'type': 'loss', 'content': 0.0753387063741684, 'timestamp': '2025-10-01 04:25:45.984993', 'step': 10438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:46.020452', 'step': 10438, 'epoch': 2} {'type': 'loss', 'content': 0.08695323765277863, 'timestamp': '2025-10-01 04:25:46.023182', 'step': 10439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:46.057049', 'step': 10439, 'epoch': 2} {'type': 'loss', 'content': 0.16683219373226166, 'timestamp': '2025-10-01 04:25:46.080876', 'step': 10440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:46.118881', 'step': 10440, 'epoch': 2} {'type': 'loss', 'content': 0.06614068895578384, 'timestamp': '2025-10-01 04:25:46.121450', 'step': 10441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:46.160862', 'step': 10441, 'epoch': 2} {'type': 'loss', 'content': 0.1004168689250946, 'timestamp': '2025-10-01 04:25:46.163000', 'step': 10442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:46.196811', 'step': 10442, 'epoch': 2} {'type': 'loss', 'content': 0.07528576999902725, 'timestamp': '2025-10-01 04:25:46.211951', 'step': 10443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:46.253424', 'step': 10443, 'epoch': 2} {'type': 'loss', 'content': 0.11124031245708466, 'timestamp': '2025-10-01 04:25:46.277221', 'step': 10444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:46.315339', 'step': 10444, 'epoch': 2} {'type': 'loss', 'content': 0.08363020420074463, 'timestamp': '2025-10-01 04:25:46.317959', 'step': 10445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:46.356145', 'step': 10445, 'epoch': 2} {'type': 'loss', 'content': 0.08656871318817139, 'timestamp': '2025-10-01 04:25:46.358912', 'step': 10446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:46.394335', 'step': 10446, 'epoch': 2} {'type': 'loss', 'content': 0.05768240988254547, 'timestamp': '2025-10-01 04:25:46.396779', 'step': 10447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:25:46.455701', 'step': 10447, 'epoch': 2} {'type': 'loss', 'content': 0.13183529675006866, 'timestamp': '2025-10-01 04:25:46.490688', 'step': 10448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:46.526686', 'step': 10448, 'epoch': 2} {'type': 'loss', 'content': 0.1745007187128067, 'timestamp': '2025-10-01 04:25:46.528922', 'step': 10449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:46.562201', 'step': 10449, 'epoch': 2} {'type': 'loss', 'content': 0.10828829556703568, 'timestamp': '2025-10-01 04:25:46.573960', 'step': 10450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:46.621336', 'step': 10450, 'epoch': 2} {'type': 'loss', 'content': 0.10208118706941605, 'timestamp': '2025-10-01 04:25:46.623603', 'step': 10451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:46.658874', 'step': 10451, 'epoch': 2} {'type': 'loss', 'content': 0.17298179864883423, 'timestamp': '2025-10-01 04:25:46.693180', 'step': 10452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:46.725552', 'step': 10452, 'epoch': 2} {'type': 'loss', 'content': 0.16428029537200928, 'timestamp': '2025-10-01 04:25:46.727809', 'step': 10453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:46.762230', 'step': 10453, 'epoch': 2} {'type': 'loss', 'content': 0.13701286911964417, 'timestamp': '2025-10-01 04:25:46.764912', 'step': 10454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:46.798883', 'step': 10454, 'epoch': 2} {'type': 'loss', 'content': 0.1573900729417801, 'timestamp': '2025-10-01 04:25:46.801346', 'step': 10455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:46.840282', 'step': 10455, 'epoch': 2} {'type': 'loss', 'content': 0.0916275605559349, 'timestamp': '2025-10-01 04:25:46.863893', 'step': 10456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:46.897362', 'step': 10456, 'epoch': 2} {'type': 'loss', 'content': 0.08981554955244064, 'timestamp': '2025-10-01 04:25:46.899816', 'step': 10457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:46.946051', 'step': 10457, 'epoch': 2} {'type': 'loss', 'content': 0.09812093526124954, 'timestamp': '2025-10-01 04:25:46.948499', 'step': 10458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:25:46.995495', 'step': 10458, 'epoch': 2} {'type': 'loss', 'content': 0.10254905372858047, 'timestamp': '2025-10-01 04:25:46.999767', 'step': 10459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:47.033002', 'step': 10459, 'epoch': 2} {'type': 'loss', 'content': 0.09703341871500015, 'timestamp': '2025-10-01 04:25:47.056941', 'step': 10460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.101327', 'step': 10460, 'epoch': 2} {'type': 'loss', 'content': 0.06792430579662323, 'timestamp': '2025-10-01 04:25:47.104602', 'step': 10461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.148777', 'step': 10461, 'epoch': 2} {'type': 'loss', 'content': 0.13324877619743347, 'timestamp': '2025-10-01 04:25:47.152243', 'step': 10462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.188092', 'step': 10462, 'epoch': 2} {'type': 'loss', 'content': 0.1428043246269226, 'timestamp': '2025-10-01 04:25:47.190657', 'step': 10463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.229155', 'step': 10463, 'epoch': 2} {'type': 'loss', 'content': 0.06371384859085083, 'timestamp': '2025-10-01 04:25:47.253026', 'step': 10464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:47.289000', 'step': 10464, 'epoch': 2} {'type': 'loss', 'content': 0.033537205308675766, 'timestamp': '2025-10-01 04:25:47.291387', 'step': 10465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.338539', 'step': 10465, 'epoch': 2} {'type': 'loss', 'content': 0.1458452343940735, 'timestamp': '2025-10-01 04:25:47.341082', 'step': 10466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:47.383447', 'step': 10466, 'epoch': 2} {'type': 'loss', 'content': 0.1269877851009369, 'timestamp': '2025-10-01 04:25:47.385955', 'step': 10467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.418996', 'step': 10467, 'epoch': 2} {'type': 'loss', 'content': 0.036589156836271286, 'timestamp': '2025-10-01 04:25:47.442600', 'step': 10468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:47.486269', 'step': 10468, 'epoch': 2} {'type': 'loss', 'content': 0.11018920689821243, 'timestamp': '2025-10-01 04:25:47.488486', 'step': 10469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:47.521777', 'step': 10469, 'epoch': 2} {'type': 'loss', 'content': 0.2057676464319229, 'timestamp': '2025-10-01 04:25:47.524745', 'step': 10470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:47.572286', 'step': 10470, 'epoch': 2} {'type': 'loss', 'content': 0.0722053125500679, 'timestamp': '2025-10-01 04:25:47.574973', 'step': 10471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.608400', 'step': 10471, 'epoch': 2} {'type': 'loss', 'content': 0.03370736539363861, 'timestamp': '2025-10-01 04:25:47.632110', 'step': 10472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.665583', 'step': 10472, 'epoch': 2} {'type': 'loss', 'content': 0.10720232874155045, 'timestamp': '2025-10-01 04:25:47.667932', 'step': 10473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:47.703053', 'step': 10473, 'epoch': 2} {'type': 'loss', 'content': 0.05522295832633972, 'timestamp': '2025-10-01 04:25:47.705597', 'step': 10474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.738467', 'step': 10474, 'epoch': 2} {'type': 'loss', 'content': 0.04268138110637665, 'timestamp': '2025-10-01 04:25:47.740663', 'step': 10475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.774776', 'step': 10475, 'epoch': 2} {'type': 'loss', 'content': 0.14441420137882233, 'timestamp': '2025-10-01 04:25:47.799062', 'step': 10476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:47.833638', 'step': 10476, 'epoch': 2} {'type': 'loss', 'content': 0.11176619678735733, 'timestamp': '2025-10-01 04:25:47.836076', 'step': 10477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:47.871198', 'step': 10477, 'epoch': 2} {'type': 'loss', 'content': 0.16882194578647614, 'timestamp': '2025-10-01 04:25:47.873738', 'step': 10478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:47.908888', 'step': 10478, 'epoch': 2} {'type': 'loss', 'content': 0.04308389872312546, 'timestamp': '2025-10-01 04:25:47.911436', 'step': 10479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:47.952784', 'step': 10479, 'epoch': 2} {'type': 'loss', 'content': 0.13814681768417358, 'timestamp': '2025-10-01 04:25:47.976248', 'step': 10480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:48.008831', 'step': 10480, 'epoch': 2} {'type': 'loss', 'content': 0.1080179214477539, 'timestamp': '2025-10-01 04:25:48.011001', 'step': 10481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:48.052841', 'step': 10481, 'epoch': 2} {'type': 'loss', 'content': 0.13205888867378235, 'timestamp': '2025-10-01 04:25:48.055175', 'step': 10482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:48.091677', 'step': 10482, 'epoch': 2} {'type': 'loss', 'content': 0.1294575333595276, 'timestamp': '2025-10-01 04:25:48.104408', 'step': 10483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:48.139540', 'step': 10483, 'epoch': 2} {'type': 'loss', 'content': 0.07857351005077362, 'timestamp': '2025-10-01 04:25:48.163390', 'step': 10484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:48.197166', 'step': 10484, 'epoch': 2} {'type': 'loss', 'content': 0.12199334800243378, 'timestamp': '2025-10-01 04:25:48.199319', 'step': 10485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:48.241645', 'step': 10485, 'epoch': 2} {'type': 'loss', 'content': 0.029712587594985962, 'timestamp': '2025-10-01 04:25:48.243814', 'step': 10486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:48.277205', 'step': 10486, 'epoch': 2} {'type': 'loss', 'content': 0.23208484053611755, 'timestamp': '2025-10-01 04:25:48.279397', 'step': 10487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:48.313530', 'step': 10487, 'epoch': 2} {'type': 'loss', 'content': 0.14894889295101166, 'timestamp': '2025-10-01 04:25:48.337405', 'step': 10488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:48.374919', 'step': 10488, 'epoch': 2} {'type': 'loss', 'content': 0.06054619699716568, 'timestamp': '2025-10-01 04:25:48.377088', 'step': 10489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:48.427139', 'step': 10489, 'epoch': 2} {'type': 'loss', 'content': 0.13825781643390656, 'timestamp': '2025-10-01 04:25:48.429916', 'step': 10490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:48.465451', 'step': 10490, 'epoch': 2} {'type': 'loss', 'content': 0.14346209168434143, 'timestamp': '2025-10-01 04:25:48.467766', 'step': 10491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:48.519833', 'step': 10491, 'epoch': 2} {'type': 'loss', 'content': 0.09661015123128891, 'timestamp': '2025-10-01 04:25:48.543507', 'step': 10492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:48.593851', 'step': 10492, 'epoch': 2} {'type': 'loss', 'content': 0.1177566647529602, 'timestamp': '2025-10-01 04:25:48.597068', 'step': 10493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:48.636692', 'step': 10493, 'epoch': 2} {'type': 'loss', 'content': 0.11114343255758286, 'timestamp': '2025-10-01 04:25:48.638937', 'step': 10494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:48.706860', 'step': 10494, 'epoch': 2} {'type': 'loss', 'content': 0.10116720199584961, 'timestamp': '2025-10-01 04:25:48.709173', 'step': 10495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:48.747716', 'step': 10495, 'epoch': 2} {'type': 'loss', 'content': 0.16623958945274353, 'timestamp': '2025-10-01 04:25:48.771495', 'step': 10496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:48.806735', 'step': 10496, 'epoch': 2} {'type': 'loss', 'content': 0.07542303949594498, 'timestamp': '2025-10-01 04:25:48.808978', 'step': 10497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:48.859321', 'step': 10497, 'epoch': 2} {'type': 'loss', 'content': 0.1715908646583557, 'timestamp': '2025-10-01 04:25:48.863222', 'step': 10498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:48.901333', 'step': 10498, 'epoch': 2} {'type': 'loss', 'content': 0.06620007008314133, 'timestamp': '2025-10-01 04:25:48.904330', 'step': 10499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:48.939156', 'step': 10499, 'epoch': 2} {'type': 'loss', 'content': 0.1710783690214157, 'timestamp': '2025-10-01 04:25:48.962773', 'step': 10500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10500', 'timestamp': '2025-10-01 04:25:53.997623', 'step': 10500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:54.032288', 'step': 10500, 'epoch': 2} {'type': 'loss', 'content': 0.08538217842578888, 'timestamp': '2025-10-01 04:25:54.035320', 'step': 10501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:54.081978', 'step': 10501, 'epoch': 2} {'type': 'loss', 'content': 0.09077051281929016, 'timestamp': '2025-10-01 04:25:54.095986', 'step': 10502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:54.154051', 'step': 10502, 'epoch': 2} {'type': 'loss', 'content': 0.13990111649036407, 'timestamp': '2025-10-01 04:25:54.168165', 'step': 10503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:54.202736', 'step': 10503, 'epoch': 2} {'type': 'loss', 'content': 0.1572740375995636, 'timestamp': '2025-10-01 04:25:54.226650', 'step': 10504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:54.272169', 'step': 10504, 'epoch': 2} {'type': 'loss', 'content': 0.24262186884880066, 'timestamp': '2025-10-01 04:25:54.275096', 'step': 10505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:54.311397', 'step': 10505, 'epoch': 2} {'type': 'loss', 'content': 0.07158598303794861, 'timestamp': '2025-10-01 04:25:54.314947', 'step': 10506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:54.355453', 'step': 10506, 'epoch': 2} {'type': 'loss', 'content': 0.1814102828502655, 'timestamp': '2025-10-01 04:25:54.357777', 'step': 10507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:25:54.393268', 'step': 10507, 'epoch': 2} {'type': 'loss', 'content': 0.06429266184568405, 'timestamp': '2025-10-01 04:25:54.417381', 'step': 10508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:54.461237', 'step': 10508, 'epoch': 2} {'type': 'loss', 'content': 0.09314548969268799, 'timestamp': '2025-10-01 04:25:54.463774', 'step': 10509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:54.502528', 'step': 10509, 'epoch': 2} {'type': 'loss', 'content': 0.05129404738545418, 'timestamp': '2025-10-01 04:25:54.506186', 'step': 10510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:54.549517', 'step': 10510, 'epoch': 2} {'type': 'loss', 'content': 0.1465129256248474, 'timestamp': '2025-10-01 04:25:54.551872', 'step': 10511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:54.593443', 'step': 10511, 'epoch': 2} {'type': 'loss', 'content': 0.10596887022256851, 'timestamp': '2025-10-01 04:25:54.617292', 'step': 10512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:54.657519', 'step': 10512, 'epoch': 2} {'type': 'loss', 'content': 0.12503090500831604, 'timestamp': '2025-10-01 04:25:54.660038', 'step': 10513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:54.697001', 'step': 10513, 'epoch': 2} {'type': 'loss', 'content': 0.103627048432827, 'timestamp': '2025-10-01 04:25:54.699294', 'step': 10514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:54.740344', 'step': 10514, 'epoch': 2} {'type': 'loss', 'content': 0.0669582411646843, 'timestamp': '2025-10-01 04:25:54.742771', 'step': 10515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:54.789697', 'step': 10515, 'epoch': 2} {'type': 'loss', 'content': 0.12809258699417114, 'timestamp': '2025-10-01 04:25:54.813562', 'step': 10516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:54.849656', 'step': 10516, 'epoch': 2} {'type': 'loss', 'content': 0.0563754104077816, 'timestamp': '2025-10-01 04:25:54.864637', 'step': 10517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:54.910406', 'step': 10517, 'epoch': 2} {'type': 'loss', 'content': 0.1817914843559265, 'timestamp': '2025-10-01 04:25:54.912791', 'step': 10518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:54.957372', 'step': 10518, 'epoch': 2} {'type': 'loss', 'content': 0.09323174506425858, 'timestamp': '2025-10-01 04:25:54.960103', 'step': 10519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:55.002424', 'step': 10519, 'epoch': 2} {'type': 'loss', 'content': 0.1760788857936859, 'timestamp': '2025-10-01 04:25:55.026516', 'step': 10520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:55.060214', 'step': 10520, 'epoch': 2} {'type': 'loss', 'content': 0.014722124673426151, 'timestamp': '2025-10-01 04:25:55.064747', 'step': 10521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:55.107283', 'step': 10521, 'epoch': 2} {'type': 'loss', 'content': 0.06953061372041702, 'timestamp': '2025-10-01 04:25:55.109688', 'step': 10522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:55.142824', 'step': 10522, 'epoch': 2} {'type': 'loss', 'content': 0.22223377227783203, 'timestamp': '2025-10-01 04:25:55.145128', 'step': 10523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:55.177552', 'step': 10523, 'epoch': 2} {'type': 'loss', 'content': 0.12151382863521576, 'timestamp': '2025-10-01 04:25:55.202054', 'step': 10524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:55.245256', 'step': 10524, 'epoch': 2} {'type': 'loss', 'content': 0.0934227705001831, 'timestamp': '2025-10-01 04:25:55.247745', 'step': 10525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:55.290750', 'step': 10525, 'epoch': 2} {'type': 'loss', 'content': 0.0627053901553154, 'timestamp': '2025-10-01 04:25:55.293216', 'step': 10526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:55.339681', 'step': 10526, 'epoch': 2} {'type': 'loss', 'content': 0.07284121960401535, 'timestamp': '2025-10-01 04:25:55.342375', 'step': 10527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:55.378284', 'step': 10527, 'epoch': 2} {'type': 'loss', 'content': 0.075425885617733, 'timestamp': '2025-10-01 04:25:55.403046', 'step': 10528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:55.448392', 'step': 10528, 'epoch': 2} {'type': 'loss', 'content': 0.18978816270828247, 'timestamp': '2025-10-01 04:25:55.450840', 'step': 10529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:55.488708', 'step': 10529, 'epoch': 2} {'type': 'loss', 'content': 0.1375422328710556, 'timestamp': '2025-10-01 04:25:55.491084', 'step': 10530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:55.530826', 'step': 10530, 'epoch': 2} {'type': 'loss', 'content': 0.1457233726978302, 'timestamp': '2025-10-01 04:25:55.533157', 'step': 10531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:55.567998', 'step': 10531, 'epoch': 2} {'type': 'loss', 'content': 0.12868693470954895, 'timestamp': '2025-10-01 04:25:55.592433', 'step': 10532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:55.626676', 'step': 10532, 'epoch': 2} {'type': 'loss', 'content': 0.1516779512166977, 'timestamp': '2025-10-01 04:25:55.629514', 'step': 10533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:55.674687', 'step': 10533, 'epoch': 2} {'type': 'loss', 'content': 0.07741173356771469, 'timestamp': '2025-10-01 04:25:55.677174', 'step': 10534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:55.712078', 'step': 10534, 'epoch': 2} {'type': 'loss', 'content': 0.06278964132070541, 'timestamp': '2025-10-01 04:25:55.715097', 'step': 10535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:55.748010', 'step': 10535, 'epoch': 2} {'type': 'loss', 'content': 0.05217147246003151, 'timestamp': '2025-10-01 04:25:55.772121', 'step': 10536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:55.813396', 'step': 10536, 'epoch': 2} {'type': 'loss', 'content': 0.1221102699637413, 'timestamp': '2025-10-01 04:25:55.815568', 'step': 10537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:55.853791', 'step': 10537, 'epoch': 2} {'type': 'loss', 'content': 0.07335788011550903, 'timestamp': '2025-10-01 04:25:55.856139', 'step': 10538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:55.896250', 'step': 10538, 'epoch': 2} {'type': 'loss', 'content': 0.10415736585855484, 'timestamp': '2025-10-01 04:25:55.898504', 'step': 10539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:55.944508', 'step': 10539, 'epoch': 2} {'type': 'loss', 'content': 0.24125227332115173, 'timestamp': '2025-10-01 04:25:55.968379', 'step': 10540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:56.017061', 'step': 10540, 'epoch': 2} {'type': 'loss', 'content': 0.17828580737113953, 'timestamp': '2025-10-01 04:25:56.019534', 'step': 10541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:56.058230', 'step': 10541, 'epoch': 2} {'type': 'loss', 'content': 0.09071652591228485, 'timestamp': '2025-10-01 04:25:56.060184', 'step': 10542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:56.092451', 'step': 10542, 'epoch': 2} {'type': 'loss', 'content': 0.1809811294078827, 'timestamp': '2025-10-01 04:25:56.094863', 'step': 10543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:56.134606', 'step': 10543, 'epoch': 2} {'type': 'loss', 'content': 0.0799633339047432, 'timestamp': '2025-10-01 04:25:56.163753', 'step': 10544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:56.197762', 'step': 10544, 'epoch': 2} {'type': 'loss', 'content': 0.05985574796795845, 'timestamp': '2025-10-01 04:25:56.199888', 'step': 10545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:56.231786', 'step': 10545, 'epoch': 2} {'type': 'loss', 'content': 0.11315751820802689, 'timestamp': '2025-10-01 04:25:56.234179', 'step': 10546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:56.274649', 'step': 10546, 'epoch': 2} {'type': 'loss', 'content': 0.09530442953109741, 'timestamp': '2025-10-01 04:25:56.276861', 'step': 10547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:56.317797', 'step': 10547, 'epoch': 2} {'type': 'loss', 'content': 0.13832204043865204, 'timestamp': '2025-10-01 04:25:56.341751', 'step': 10548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:56.389142', 'step': 10548, 'epoch': 2} {'type': 'loss', 'content': 0.1764102727174759, 'timestamp': '2025-10-01 04:25:56.391759', 'step': 10549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:56.426220', 'step': 10549, 'epoch': 2} {'type': 'loss', 'content': 0.047452062368392944, 'timestamp': '2025-10-01 04:25:56.428354', 'step': 10550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:56.465931', 'step': 10550, 'epoch': 2} {'type': 'loss', 'content': 0.17063167691230774, 'timestamp': '2025-10-01 04:25:56.468965', 'step': 10551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:56.509318', 'step': 10551, 'epoch': 2} {'type': 'loss', 'content': 0.13761980831623077, 'timestamp': '2025-10-01 04:25:56.533849', 'step': 10552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:56.569255', 'step': 10552, 'epoch': 2} {'type': 'loss', 'content': 0.13354694843292236, 'timestamp': '2025-10-01 04:25:56.571543', 'step': 10553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:56.618253', 'step': 10553, 'epoch': 2} {'type': 'loss', 'content': 0.09129191190004349, 'timestamp': '2025-10-01 04:25:56.621530', 'step': 10554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:56.661796', 'step': 10554, 'epoch': 2} {'type': 'loss', 'content': 0.11197849363088608, 'timestamp': '2025-10-01 04:25:56.663939', 'step': 10555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:56.703796', 'step': 10555, 'epoch': 2} {'type': 'loss', 'content': 0.11506876349449158, 'timestamp': '2025-10-01 04:25:56.727129', 'step': 10556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:56.759087', 'step': 10556, 'epoch': 2} {'type': 'loss', 'content': 0.08000510185956955, 'timestamp': '2025-10-01 04:25:56.761017', 'step': 10557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:56.793676', 'step': 10557, 'epoch': 2} {'type': 'loss', 'content': 0.08936065435409546, 'timestamp': '2025-10-01 04:25:56.797092', 'step': 10558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:56.829278', 'step': 10558, 'epoch': 2} {'type': 'loss', 'content': 0.1219630241394043, 'timestamp': '2025-10-01 04:25:56.831522', 'step': 10559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:56.865943', 'step': 10559, 'epoch': 2} {'type': 'loss', 'content': 0.10482517629861832, 'timestamp': '2025-10-01 04:25:56.889469', 'step': 10560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:56.926941', 'step': 10560, 'epoch': 2} {'type': 'loss', 'content': 0.14836485683918, 'timestamp': '2025-10-01 04:25:56.928785', 'step': 10561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:56.961783', 'step': 10561, 'epoch': 2} {'type': 'loss', 'content': 0.06308965384960175, 'timestamp': '2025-10-01 04:25:56.964741', 'step': 10562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:56.998465', 'step': 10562, 'epoch': 2} {'type': 'loss', 'content': 0.14600981771945953, 'timestamp': '2025-10-01 04:25:57.010838', 'step': 10563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:57.044805', 'step': 10563, 'epoch': 2} {'type': 'loss', 'content': 0.08894751965999603, 'timestamp': '2025-10-01 04:25:57.068416', 'step': 10564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:57.106055', 'step': 10564, 'epoch': 2} {'type': 'loss', 'content': 0.09897574782371521, 'timestamp': '2025-10-01 04:25:57.108233', 'step': 10565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:57.147903', 'step': 10565, 'epoch': 2} {'type': 'loss', 'content': 0.13422217965126038, 'timestamp': '2025-10-01 04:25:57.150403', 'step': 10566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:57.182615', 'step': 10566, 'epoch': 2} {'type': 'loss', 'content': 0.08718479424715042, 'timestamp': '2025-10-01 04:25:57.185099', 'step': 10567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:57.222055', 'step': 10567, 'epoch': 2} {'type': 'loss', 'content': 0.08438407629728317, 'timestamp': '2025-10-01 04:25:57.245570', 'step': 10568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:57.276420', 'step': 10568, 'epoch': 2} {'type': 'loss', 'content': 0.06550417095422745, 'timestamp': '2025-10-01 04:25:57.279014', 'step': 10569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:57.311274', 'step': 10569, 'epoch': 2} {'type': 'loss', 'content': 0.08278948813676834, 'timestamp': '2025-10-01 04:25:57.313509', 'step': 10570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:57.351093', 'step': 10570, 'epoch': 2} {'type': 'loss', 'content': 0.07164840400218964, 'timestamp': '2025-10-01 04:25:57.353490', 'step': 10571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:57.384889', 'step': 10571, 'epoch': 2} {'type': 'loss', 'content': 0.14286047220230103, 'timestamp': '2025-10-01 04:25:57.411357', 'step': 10572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:57.449526', 'step': 10572, 'epoch': 2} {'type': 'loss', 'content': 0.1086154356598854, 'timestamp': '2025-10-01 04:25:57.465918', 'step': 10573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:57.502563', 'step': 10573, 'epoch': 2} {'type': 'loss', 'content': 0.11988527327775955, 'timestamp': '2025-10-01 04:25:57.505757', 'step': 10574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:57.541976', 'step': 10574, 'epoch': 2} {'type': 'loss', 'content': 0.10785769671201706, 'timestamp': '2025-10-01 04:25:57.552727', 'step': 10575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:57.584879', 'step': 10575, 'epoch': 2} {'type': 'loss', 'content': 0.20400401949882507, 'timestamp': '2025-10-01 04:25:57.609884', 'step': 10576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:57.646620', 'step': 10576, 'epoch': 2} {'type': 'loss', 'content': 0.1436169147491455, 'timestamp': '2025-10-01 04:25:57.648666', 'step': 10577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:57.684546', 'step': 10577, 'epoch': 2} {'type': 'loss', 'content': 0.052085019648075104, 'timestamp': '2025-10-01 04:25:57.688511', 'step': 10578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:57.722303', 'step': 10578, 'epoch': 2} {'type': 'loss', 'content': 0.10982824116945267, 'timestamp': '2025-10-01 04:25:57.724683', 'step': 10579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:57.766445', 'step': 10579, 'epoch': 2} {'type': 'loss', 'content': 0.011305340565741062, 'timestamp': '2025-10-01 04:25:57.790235', 'step': 10580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:57.825420', 'step': 10580, 'epoch': 2} {'type': 'loss', 'content': 0.08036958426237106, 'timestamp': '2025-10-01 04:25:57.827529', 'step': 10581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:57.861172', 'step': 10581, 'epoch': 2} {'type': 'loss', 'content': 0.11840243637561798, 'timestamp': '2025-10-01 04:25:57.873360', 'step': 10582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:57.914454', 'step': 10582, 'epoch': 2} {'type': 'loss', 'content': 0.05861115828156471, 'timestamp': '2025-10-01 04:25:57.920667', 'step': 10583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:57.953744', 'step': 10583, 'epoch': 2} {'type': 'loss', 'content': 0.07388104498386383, 'timestamp': '2025-10-01 04:25:57.977727', 'step': 10584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:58.012958', 'step': 10584, 'epoch': 2} {'type': 'loss', 'content': 0.10450234264135361, 'timestamp': '2025-10-01 04:25:58.017366', 'step': 10585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:58.056614', 'step': 10585, 'epoch': 2} {'type': 'loss', 'content': 0.1228809505701065, 'timestamp': '2025-10-01 04:25:58.059519', 'step': 10586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:58.092022', 'step': 10586, 'epoch': 2} {'type': 'loss', 'content': 0.09794917702674866, 'timestamp': '2025-10-01 04:25:58.094904', 'step': 10587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:25:58.134347', 'step': 10587, 'epoch': 2} {'type': 'loss', 'content': 0.06943254917860031, 'timestamp': '2025-10-01 04:25:58.159837', 'step': 10588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:58.200574', 'step': 10588, 'epoch': 2} {'type': 'loss', 'content': 0.09788358211517334, 'timestamp': '2025-10-01 04:25:58.203274', 'step': 10589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:58.244214', 'step': 10589, 'epoch': 2} {'type': 'loss', 'content': 0.10186321288347244, 'timestamp': '2025-10-01 04:25:58.246514', 'step': 10590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:58.279835', 'step': 10590, 'epoch': 2} {'type': 'loss', 'content': 0.12190796434879303, 'timestamp': '2025-10-01 04:25:58.281957', 'step': 10591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:58.322619', 'step': 10591, 'epoch': 2} {'type': 'loss', 'content': 0.10802005976438522, 'timestamp': '2025-10-01 04:25:58.346383', 'step': 10592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:58.378933', 'step': 10592, 'epoch': 2} {'type': 'loss', 'content': 0.17338013648986816, 'timestamp': '2025-10-01 04:25:58.381964', 'step': 10593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:58.416915', 'step': 10593, 'epoch': 2} {'type': 'loss', 'content': 0.08542413264513016, 'timestamp': '2025-10-01 04:25:58.419382', 'step': 10594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:58.452042', 'step': 10594, 'epoch': 2} {'type': 'loss', 'content': 0.12716995179653168, 'timestamp': '2025-10-01 04:25:58.454157', 'step': 10595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:58.485985', 'step': 10595, 'epoch': 2} {'type': 'loss', 'content': 0.1434471756219864, 'timestamp': '2025-10-01 04:25:58.509661', 'step': 10596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:58.549588', 'step': 10596, 'epoch': 2} {'type': 'loss', 'content': 0.08915337175130844, 'timestamp': '2025-10-01 04:25:58.551780', 'step': 10597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:58.583382', 'step': 10597, 'epoch': 2} {'type': 'loss', 'content': 0.09044764190912247, 'timestamp': '2025-10-01 04:25:58.588792', 'step': 10598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:58.622918', 'step': 10598, 'epoch': 2} {'type': 'loss', 'content': 0.07363810390233994, 'timestamp': '2025-10-01 04:25:58.624837', 'step': 10599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:58.656256', 'step': 10599, 'epoch': 2} {'type': 'loss', 'content': 0.17116080224514008, 'timestamp': '2025-10-01 04:25:58.680989', 'step': 10600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:25:58.719621', 'step': 10600, 'epoch': 2} {'type': 'loss', 'content': 0.162806436419487, 'timestamp': '2025-10-01 04:25:58.722089', 'step': 10601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:58.755499', 'step': 10601, 'epoch': 2} {'type': 'loss', 'content': 0.060721926391124725, 'timestamp': '2025-10-01 04:25:58.771368', 'step': 10602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:58.803311', 'step': 10602, 'epoch': 2} {'type': 'loss', 'content': 0.15431825816631317, 'timestamp': '2025-10-01 04:25:58.805855', 'step': 10603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:58.844242', 'step': 10603, 'epoch': 2} {'type': 'loss', 'content': 0.09602347761392593, 'timestamp': '2025-10-01 04:25:58.868272', 'step': 10604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:58.903911', 'step': 10604, 'epoch': 2} {'type': 'loss', 'content': 0.13565975427627563, 'timestamp': '2025-10-01 04:25:58.906097', 'step': 10605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:58.959629', 'step': 10605, 'epoch': 2} {'type': 'loss', 'content': 0.1706770956516266, 'timestamp': '2025-10-01 04:25:58.961902', 'step': 10606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:59.001658', 'step': 10606, 'epoch': 2} {'type': 'loss', 'content': 0.09682261198759079, 'timestamp': '2025-10-01 04:25:59.003768', 'step': 10607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:59.036214', 'step': 10607, 'epoch': 2} {'type': 'loss', 'content': 0.1516072303056717, 'timestamp': '2025-10-01 04:25:59.061471', 'step': 10608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:59.096627', 'step': 10608, 'epoch': 2} {'type': 'loss', 'content': 0.13430634140968323, 'timestamp': '2025-10-01 04:25:59.112524', 'step': 10609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:59.144377', 'step': 10609, 'epoch': 2} {'type': 'loss', 'content': 0.10945326834917068, 'timestamp': '2025-10-01 04:25:59.146992', 'step': 10610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:59.179567', 'step': 10610, 'epoch': 2} {'type': 'loss', 'content': 0.16645948588848114, 'timestamp': '2025-10-01 04:25:59.183480', 'step': 10611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:59.218089', 'step': 10611, 'epoch': 2} {'type': 'loss', 'content': 0.08303308486938477, 'timestamp': '2025-10-01 04:25:59.242837', 'step': 10612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:59.275589', 'step': 10612, 'epoch': 2} {'type': 'loss', 'content': 0.1246298998594284, 'timestamp': '2025-10-01 04:25:59.277395', 'step': 10613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:59.310286', 'step': 10613, 'epoch': 2} {'type': 'loss', 'content': 0.116522416472435, 'timestamp': '2025-10-01 04:25:59.312566', 'step': 10614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:59.345482', 'step': 10614, 'epoch': 2} {'type': 'loss', 'content': 0.15154683589935303, 'timestamp': '2025-10-01 04:25:59.348021', 'step': 10615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:59.380560', 'step': 10615, 'epoch': 2} {'type': 'loss', 'content': 0.11939509958028793, 'timestamp': '2025-10-01 04:25:59.404240', 'step': 10616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:59.437052', 'step': 10616, 'epoch': 2} {'type': 'loss', 'content': 0.12949925661087036, 'timestamp': '2025-10-01 04:25:59.439347', 'step': 10617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:59.474485', 'step': 10617, 'epoch': 2} {'type': 'loss', 'content': 0.11005262285470963, 'timestamp': '2025-10-01 04:25:59.476611', 'step': 10618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:59.507593', 'step': 10618, 'epoch': 2} {'type': 'loss', 'content': 0.08311298489570618, 'timestamp': '2025-10-01 04:25:59.509800', 'step': 10619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:59.544971', 'step': 10619, 'epoch': 2} {'type': 'loss', 'content': 0.12670348584651947, 'timestamp': '2025-10-01 04:25:59.568721', 'step': 10620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:25:59.605334', 'step': 10620, 'epoch': 2} {'type': 'loss', 'content': 0.13907331228256226, 'timestamp': '2025-10-01 04:25:59.613662', 'step': 10621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:59.645672', 'step': 10621, 'epoch': 2} {'type': 'loss', 'content': 0.09817032516002655, 'timestamp': '2025-10-01 04:25:59.647735', 'step': 10622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:59.680042', 'step': 10622, 'epoch': 2} {'type': 'loss', 'content': 0.17003507912158966, 'timestamp': '2025-10-01 04:25:59.682396', 'step': 10623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:59.714915', 'step': 10623, 'epoch': 2} {'type': 'loss', 'content': 0.15024694800376892, 'timestamp': '2025-10-01 04:25:59.738292', 'step': 10624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:59.770055', 'step': 10624, 'epoch': 2} {'type': 'loss', 'content': 0.17419184744358063, 'timestamp': '2025-10-01 04:25:59.773062', 'step': 10625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:25:59.804897', 'step': 10625, 'epoch': 2} {'type': 'loss', 'content': 0.10418358445167542, 'timestamp': '2025-10-01 04:25:59.820370', 'step': 10626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:59.851131', 'step': 10626, 'epoch': 2} {'type': 'loss', 'content': 0.1845293939113617, 'timestamp': '2025-10-01 04:25:59.855077', 'step': 10627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:25:59.886765', 'step': 10627, 'epoch': 2} {'type': 'loss', 'content': 0.20484477281570435, 'timestamp': '2025-10-01 04:25:59.911787', 'step': 10628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:25:59.949183', 'step': 10628, 'epoch': 2} {'type': 'loss', 'content': 0.06239922717213631, 'timestamp': '2025-10-01 04:25:59.955332', 'step': 10629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:25:59.990672', 'step': 10629, 'epoch': 2} {'type': 'loss', 'content': 0.08320986479520798, 'timestamp': '2025-10-01 04:26:00.001012', 'step': 10630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:00.059291', 'step': 10630, 'epoch': 2} {'type': 'loss', 'content': 0.08649713546037674, 'timestamp': '2025-10-01 04:26:00.062777', 'step': 10631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:00.110167', 'step': 10631, 'epoch': 2} {'type': 'loss', 'content': 0.15314273536205292, 'timestamp': '2025-10-01 04:26:00.139288', 'step': 10632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:00.178950', 'step': 10632, 'epoch': 2} {'type': 'loss', 'content': 0.13636600971221924, 'timestamp': '2025-10-01 04:26:00.181175', 'step': 10633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:00.212578', 'step': 10633, 'epoch': 2} {'type': 'loss', 'content': 0.12916313111782074, 'timestamp': '2025-10-01 04:26:00.226249', 'step': 10634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:00.259610', 'step': 10634, 'epoch': 2} {'type': 'loss', 'content': 0.14038872718811035, 'timestamp': '2025-10-01 04:26:00.265358', 'step': 10635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:00.300034', 'step': 10635, 'epoch': 2} {'type': 'loss', 'content': 0.15157537162303925, 'timestamp': '2025-10-01 04:26:00.323842', 'step': 10636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:00.360037', 'step': 10636, 'epoch': 2} {'type': 'loss', 'content': 0.11736206710338593, 'timestamp': '2025-10-01 04:26:00.362604', 'step': 10637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:00.404651', 'step': 10637, 'epoch': 2} {'type': 'loss', 'content': 0.15563136339187622, 'timestamp': '2025-10-01 04:26:00.407318', 'step': 10638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:00.439115', 'step': 10638, 'epoch': 2} {'type': 'loss', 'content': 0.15112116932868958, 'timestamp': '2025-10-01 04:26:00.441519', 'step': 10639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:00.473115', 'step': 10639, 'epoch': 2} {'type': 'loss', 'content': 0.0836515873670578, 'timestamp': '2025-10-01 04:26:00.515059', 'step': 10640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:00.552449', 'step': 10640, 'epoch': 2} {'type': 'loss', 'content': 0.11067280173301697, 'timestamp': '2025-10-01 04:26:00.554649', 'step': 10641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:00.597149', 'step': 10641, 'epoch': 2} {'type': 'loss', 'content': 0.06243479251861572, 'timestamp': '2025-10-01 04:26:00.599146', 'step': 10642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:00.637299', 'step': 10642, 'epoch': 2} {'type': 'loss', 'content': 0.09173806756734848, 'timestamp': '2025-10-01 04:26:00.640786', 'step': 10643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:00.677178', 'step': 10643, 'epoch': 2} {'type': 'loss', 'content': 0.1271762251853943, 'timestamp': '2025-10-01 04:26:00.701039', 'step': 10644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:00.735867', 'step': 10644, 'epoch': 2} {'type': 'loss', 'content': 0.20367255806922913, 'timestamp': '2025-10-01 04:26:00.738106', 'step': 10645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:00.769849', 'step': 10645, 'epoch': 2} {'type': 'loss', 'content': 0.0976378545165062, 'timestamp': '2025-10-01 04:26:00.771991', 'step': 10646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:00.803905', 'step': 10646, 'epoch': 2} {'type': 'loss', 'content': 0.12497903406620026, 'timestamp': '2025-10-01 04:26:00.806037', 'step': 10647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:00.841237', 'step': 10647, 'epoch': 2} {'type': 'loss', 'content': 0.14553041756153107, 'timestamp': '2025-10-01 04:26:00.864859', 'step': 10648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:00.898889', 'step': 10648, 'epoch': 2} {'type': 'loss', 'content': 0.09071066975593567, 'timestamp': '2025-10-01 04:26:00.900988', 'step': 10649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:00.937695', 'step': 10649, 'epoch': 2} {'type': 'loss', 'content': 0.05907134711742401, 'timestamp': '2025-10-01 04:26:00.942933', 'step': 10650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:00.982133', 'step': 10650, 'epoch': 2} {'type': 'loss', 'content': 0.10895583778619766, 'timestamp': '2025-10-01 04:26:00.984890', 'step': 10651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:01.031749', 'step': 10651, 'epoch': 2} {'type': 'loss', 'content': 0.09241453558206558, 'timestamp': '2025-10-01 04:26:01.055298', 'step': 10652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:01.094123', 'step': 10652, 'epoch': 2} {'type': 'loss', 'content': 0.08006023615598679, 'timestamp': '2025-10-01 04:26:01.096097', 'step': 10653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:01.134406', 'step': 10653, 'epoch': 2} {'type': 'loss', 'content': 0.13532495498657227, 'timestamp': '2025-10-01 04:26:01.137278', 'step': 10654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:01.191113', 'step': 10654, 'epoch': 2} {'type': 'loss', 'content': 0.09557792544364929, 'timestamp': '2025-10-01 04:26:01.193710', 'step': 10655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:01.233522', 'step': 10655, 'epoch': 2} {'type': 'loss', 'content': 0.0768757313489914, 'timestamp': '2025-10-01 04:26:01.257666', 'step': 10656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:01.301741', 'step': 10656, 'epoch': 2} {'type': 'loss', 'content': 0.0447210818529129, 'timestamp': '2025-10-01 04:26:01.303912', 'step': 10657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:01.335433', 'step': 10657, 'epoch': 2} {'type': 'loss', 'content': 0.06507247686386108, 'timestamp': '2025-10-01 04:26:01.337743', 'step': 10658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:01.369346', 'step': 10658, 'epoch': 2} {'type': 'loss', 'content': 0.13316024839878082, 'timestamp': '2025-10-01 04:26:01.371915', 'step': 10659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:01.410018', 'step': 10659, 'epoch': 2} {'type': 'loss', 'content': 0.08253931254148483, 'timestamp': '2025-10-01 04:26:01.433575', 'step': 10660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:01.464960', 'step': 10660, 'epoch': 2} {'type': 'loss', 'content': 0.14051955938339233, 'timestamp': '2025-10-01 04:26:01.493333', 'step': 10661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:01.530318', 'step': 10661, 'epoch': 2} {'type': 'loss', 'content': 0.14940492808818817, 'timestamp': '2025-10-01 04:26:01.532405', 'step': 10662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:01.570055', 'step': 10662, 'epoch': 2} {'type': 'loss', 'content': 0.16526132822036743, 'timestamp': '2025-10-01 04:26:01.572347', 'step': 10663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:01.604243', 'step': 10663, 'epoch': 2} {'type': 'loss', 'content': 0.19280080497264862, 'timestamp': '2025-10-01 04:26:01.628290', 'step': 10664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:01.660623', 'step': 10664, 'epoch': 2} {'type': 'loss', 'content': 0.07780209183692932, 'timestamp': '2025-10-01 04:26:01.662661', 'step': 10665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:01.699856', 'step': 10665, 'epoch': 2} {'type': 'loss', 'content': 0.1060858815908432, 'timestamp': '2025-10-01 04:26:01.702023', 'step': 10666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:01.744118', 'step': 10666, 'epoch': 2} {'type': 'loss', 'content': 0.13962218165397644, 'timestamp': '2025-10-01 04:26:01.747022', 'step': 10667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:01.784656', 'step': 10667, 'epoch': 2} {'type': 'loss', 'content': 0.07488570362329483, 'timestamp': '2025-10-01 04:26:01.815338', 'step': 10668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:01.851533', 'step': 10668, 'epoch': 2} {'type': 'loss', 'content': 0.06858570873737335, 'timestamp': '2025-10-01 04:26:01.853893', 'step': 10669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:01.892166', 'step': 10669, 'epoch': 2} {'type': 'loss', 'content': 0.15124328434467316, 'timestamp': '2025-10-01 04:26:01.894292', 'step': 10670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:01.931585', 'step': 10670, 'epoch': 2} {'type': 'loss', 'content': 0.12724602222442627, 'timestamp': '2025-10-01 04:26:01.934379', 'step': 10671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:01.969453', 'step': 10671, 'epoch': 2} {'type': 'loss', 'content': 0.05686783418059349, 'timestamp': '2025-10-01 04:26:01.993188', 'step': 10672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:02.029946', 'step': 10672, 'epoch': 2} {'type': 'loss', 'content': 0.11268065869808197, 'timestamp': '2025-10-01 04:26:02.032136', 'step': 10673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:02.062924', 'step': 10673, 'epoch': 2} {'type': 'loss', 'content': 0.09913304448127747, 'timestamp': '2025-10-01 04:26:02.065444', 'step': 10674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:02.097799', 'step': 10674, 'epoch': 2} {'type': 'loss', 'content': 0.1298275738954544, 'timestamp': '2025-10-01 04:26:02.100107', 'step': 10675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:02.143711', 'step': 10675, 'epoch': 2} {'type': 'loss', 'content': 0.11568140238523483, 'timestamp': '2025-10-01 04:26:02.167373', 'step': 10676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:02.198173', 'step': 10676, 'epoch': 2} {'type': 'loss', 'content': 0.18716777861118317, 'timestamp': '2025-10-01 04:26:02.200842', 'step': 10677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:02.233694', 'step': 10677, 'epoch': 2} {'type': 'loss', 'content': 0.09276735782623291, 'timestamp': '2025-10-01 04:26:02.236211', 'step': 10678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:02.274006', 'step': 10678, 'epoch': 2} {'type': 'loss', 'content': 0.1223980113863945, 'timestamp': '2025-10-01 04:26:02.276513', 'step': 10679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:02.310608', 'step': 10679, 'epoch': 2} {'type': 'loss', 'content': 0.10006403923034668, 'timestamp': '2025-10-01 04:26:02.334564', 'step': 10680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:02.377071', 'step': 10680, 'epoch': 2} {'type': 'loss', 'content': 0.15473391115665436, 'timestamp': '2025-10-01 04:26:02.379323', 'step': 10681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:02.417660', 'step': 10681, 'epoch': 2} {'type': 'loss', 'content': 0.11696769297122955, 'timestamp': '2025-10-01 04:26:02.419787', 'step': 10682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:02.453024', 'step': 10682, 'epoch': 2} {'type': 'loss', 'content': 0.15981316566467285, 'timestamp': '2025-10-01 04:26:02.455454', 'step': 10683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:26:02.491299', 'step': 10683, 'epoch': 2} {'type': 'loss', 'content': 0.1351940780878067, 'timestamp': '2025-10-01 04:26:02.517003', 'step': 10684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:02.549213', 'step': 10684, 'epoch': 2} {'type': 'loss', 'content': 0.07393321394920349, 'timestamp': '2025-10-01 04:26:02.551403', 'step': 10685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:02.583521', 'step': 10685, 'epoch': 2} {'type': 'loss', 'content': 0.07333928346633911, 'timestamp': '2025-10-01 04:26:02.585740', 'step': 10686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:02.631279', 'step': 10686, 'epoch': 2} {'type': 'loss', 'content': 0.16201195120811462, 'timestamp': '2025-10-01 04:26:02.633690', 'step': 10687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:02.665836', 'step': 10687, 'epoch': 2} {'type': 'loss', 'content': 0.18941479921340942, 'timestamp': '2025-10-01 04:26:02.689437', 'step': 10688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:02.722233', 'step': 10688, 'epoch': 2} {'type': 'loss', 'content': 0.10630027949810028, 'timestamp': '2025-10-01 04:26:02.733319', 'step': 10689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:02.775577', 'step': 10689, 'epoch': 2} {'type': 'loss', 'content': 0.11979185789823532, 'timestamp': '2025-10-01 04:26:02.777904', 'step': 10690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:26:02.819813', 'step': 10690, 'epoch': 2} {'type': 'loss', 'content': 0.09240605682134628, 'timestamp': '2025-10-01 04:26:02.824150', 'step': 10691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:02.855690', 'step': 10691, 'epoch': 2} {'type': 'loss', 'content': 0.13641773164272308, 'timestamp': '2025-10-01 04:26:02.879576', 'step': 10692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:02.910569', 'step': 10692, 'epoch': 2} {'type': 'loss', 'content': 0.05328545346856117, 'timestamp': '2025-10-01 04:26:02.913082', 'step': 10693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:02.954140', 'step': 10693, 'epoch': 2} {'type': 'loss', 'content': 0.07672307640314102, 'timestamp': '2025-10-01 04:26:02.957297', 'step': 10694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:02.989412', 'step': 10694, 'epoch': 2} {'type': 'loss', 'content': 0.20343182981014252, 'timestamp': '2025-10-01 04:26:02.992163', 'step': 10695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:03.024062', 'step': 10695, 'epoch': 2} {'type': 'loss', 'content': 0.08892717957496643, 'timestamp': '2025-10-01 04:26:03.047741', 'step': 10696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:03.078801', 'step': 10696, 'epoch': 2} {'type': 'loss', 'content': 0.1411164402961731, 'timestamp': '2025-10-01 04:26:03.081129', 'step': 10697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:03.111554', 'step': 10697, 'epoch': 2} {'type': 'loss', 'content': 0.1430356800556183, 'timestamp': '2025-10-01 04:26:03.114797', 'step': 10698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:03.146850', 'step': 10698, 'epoch': 2} {'type': 'loss', 'content': 0.08414620906114578, 'timestamp': '2025-10-01 04:26:03.149159', 'step': 10699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:03.179711', 'step': 10699, 'epoch': 2} {'type': 'loss', 'content': 0.09967885166406631, 'timestamp': '2025-10-01 04:26:03.204038', 'step': 10700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:03.235519', 'step': 10700, 'epoch': 2} {'type': 'loss', 'content': 0.10155379772186279, 'timestamp': '2025-10-01 04:26:03.237605', 'step': 10701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:03.268234', 'step': 10701, 'epoch': 2} {'type': 'loss', 'content': 0.16288737952709198, 'timestamp': '2025-10-01 04:26:03.270471', 'step': 10702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:03.301204', 'step': 10702, 'epoch': 2} {'type': 'loss', 'content': 0.06877431273460388, 'timestamp': '2025-10-01 04:26:03.304967', 'step': 10703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:03.336232', 'step': 10703, 'epoch': 2} {'type': 'loss', 'content': 0.15603704750537872, 'timestamp': '2025-10-01 04:26:03.360006', 'step': 10704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:03.390979', 'step': 10704, 'epoch': 2} {'type': 'loss', 'content': 0.07376635819673538, 'timestamp': '2025-10-01 04:26:03.393392', 'step': 10705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:03.424208', 'step': 10705, 'epoch': 2} {'type': 'loss', 'content': 0.17722660303115845, 'timestamp': '2025-10-01 04:26:03.427626', 'step': 10706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:03.459154', 'step': 10706, 'epoch': 2} {'type': 'loss', 'content': 0.03294361010193825, 'timestamp': '2025-10-01 04:26:03.461821', 'step': 10707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:03.492851', 'step': 10707, 'epoch': 2} {'type': 'loss', 'content': 0.18011176586151123, 'timestamp': '2025-10-01 04:26:03.517992', 'step': 10708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:03.548880', 'step': 10708, 'epoch': 2} {'type': 'loss', 'content': 0.1607111096382141, 'timestamp': '2025-10-01 04:26:03.552709', 'step': 10709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:03.583212', 'step': 10709, 'epoch': 2} {'type': 'loss', 'content': 0.1006445437669754, 'timestamp': '2025-10-01 04:26:03.598157', 'step': 10710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:03.629053', 'step': 10710, 'epoch': 2} {'type': 'loss', 'content': 0.08251485973596573, 'timestamp': '2025-10-01 04:26:03.631633', 'step': 10711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:03.663436', 'step': 10711, 'epoch': 2} {'type': 'loss', 'content': 0.08462723344564438, 'timestamp': '2025-10-01 04:26:03.689848', 'step': 10712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:03.722167', 'step': 10712, 'epoch': 2} {'type': 'loss', 'content': 0.08301671594381332, 'timestamp': '2025-10-01 04:26:03.724743', 'step': 10713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:03.756506', 'step': 10713, 'epoch': 2} {'type': 'loss', 'content': 0.09252359718084335, 'timestamp': '2025-10-01 04:26:03.759393', 'step': 10714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:03.808124', 'step': 10714, 'epoch': 2} {'type': 'loss', 'content': 0.09021202474832535, 'timestamp': '2025-10-01 04:26:03.810565', 'step': 10715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:03.848917', 'step': 10715, 'epoch': 2} {'type': 'loss', 'content': 0.07589466869831085, 'timestamp': '2025-10-01 04:26:03.883641', 'step': 10716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:03.917523', 'step': 10716, 'epoch': 2} {'type': 'loss', 'content': 0.11620240658521652, 'timestamp': '2025-10-01 04:26:03.929445', 'step': 10717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:03.961681', 'step': 10717, 'epoch': 2} {'type': 'loss', 'content': 0.09794435650110245, 'timestamp': '2025-10-01 04:26:03.964568', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:26:12.255908', 'step': 10718, 'epoch': 2} {'type': 'pplx', 'content': 12263.344871591431, 'timestamp': '2025-10-01 04:26:12.260219', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:12.296483', 'step': 10718, 'epoch': 2} {'type': 'loss', 'content': 0.14570418000221252, 'timestamp': '2025-10-01 04:26:12.298699', 'step': 10719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:12.329452', 'step': 10719, 'epoch': 2} {'type': 'loss', 'content': 0.07326631247997284, 'timestamp': '2025-10-01 04:26:12.353074', 'step': 10720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:12.383126', 'step': 10720, 'epoch': 2} {'type': 'loss', 'content': 0.11610976606607437, 'timestamp': '2025-10-01 04:26:12.385198', 'step': 10721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:12.417243', 'step': 10721, 'epoch': 2} {'type': 'loss', 'content': 0.0352771021425724, 'timestamp': '2025-10-01 04:26:12.419471', 'step': 10722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:12.450325', 'step': 10722, 'epoch': 2} {'type': 'loss', 'content': 0.12211864441633224, 'timestamp': '2025-10-01 04:26:12.461235', 'step': 10723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:26:12.493720', 'step': 10723, 'epoch': 2} {'type': 'loss', 'content': 0.09739254415035248, 'timestamp': '2025-10-01 04:26:12.518935', 'step': 10724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:12.549799', 'step': 10724, 'epoch': 2} {'type': 'loss', 'content': 0.12104681879281998, 'timestamp': '2025-10-01 04:26:12.557692', 'step': 10725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:12.589168', 'step': 10725, 'epoch': 2} {'type': 'loss', 'content': 0.20785236358642578, 'timestamp': '2025-10-01 04:26:12.591578', 'step': 10726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:12.624523', 'step': 10726, 'epoch': 2} {'type': 'loss', 'content': 0.19407068192958832, 'timestamp': '2025-10-01 04:26:12.626755', 'step': 10727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:12.659571', 'step': 10727, 'epoch': 2} {'type': 'loss', 'content': 0.10544038563966751, 'timestamp': '2025-10-01 04:26:12.683695', 'step': 10728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:12.715429', 'step': 10728, 'epoch': 2} {'type': 'loss', 'content': 0.1280948668718338, 'timestamp': '2025-10-01 04:26:12.717676', 'step': 10729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:12.749843', 'step': 10729, 'epoch': 2} {'type': 'loss', 'content': 0.08165081590414047, 'timestamp': '2025-10-01 04:26:12.752482', 'step': 10730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:12.787367', 'step': 10730, 'epoch': 2} {'type': 'loss', 'content': 0.09838928282260895, 'timestamp': '2025-10-01 04:26:12.791280', 'step': 10731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:12.833332', 'step': 10731, 'epoch': 2} {'type': 'loss', 'content': 0.03351305425167084, 'timestamp': '2025-10-01 04:26:12.870853', 'step': 10732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:12.918500', 'step': 10732, 'epoch': 2} {'type': 'loss', 'content': 0.10345069319009781, 'timestamp': '2025-10-01 04:26:12.920913', 'step': 10733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:12.952593', 'step': 10733, 'epoch': 2} {'type': 'loss', 'content': 0.13313312828540802, 'timestamp': '2025-10-01 04:26:12.955004', 'step': 10734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:12.986958', 'step': 10734, 'epoch': 2} {'type': 'loss', 'content': 0.04473284259438515, 'timestamp': '2025-10-01 04:26:12.989063', 'step': 10735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:13.019979', 'step': 10735, 'epoch': 2} {'type': 'loss', 'content': 0.13856640458106995, 'timestamp': '2025-10-01 04:26:13.043802', 'step': 10736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:13.074414', 'step': 10736, 'epoch': 2} {'type': 'loss', 'content': 0.1332893818616867, 'timestamp': '2025-10-01 04:26:13.076865', 'step': 10737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:13.110635', 'step': 10737, 'epoch': 2} {'type': 'loss', 'content': 0.068208247423172, 'timestamp': '2025-10-01 04:26:13.114317', 'step': 10738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:13.144794', 'step': 10738, 'epoch': 2} {'type': 'loss', 'content': 0.17990092933177948, 'timestamp': '2025-10-01 04:26:13.147433', 'step': 10739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:13.178540', 'step': 10739, 'epoch': 2} {'type': 'loss', 'content': 0.11641191691160202, 'timestamp': '2025-10-01 04:26:13.202571', 'step': 10740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:13.241236', 'step': 10740, 'epoch': 2} {'type': 'loss', 'content': 0.06888130307197571, 'timestamp': '2025-10-01 04:26:13.243446', 'step': 10741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:13.283574', 'step': 10741, 'epoch': 2} {'type': 'loss', 'content': 0.05607076361775398, 'timestamp': '2025-10-01 04:26:13.286347', 'step': 10742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:13.318173', 'step': 10742, 'epoch': 2} {'type': 'loss', 'content': 0.14627650380134583, 'timestamp': '2025-10-01 04:26:13.321053', 'step': 10743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:13.352158', 'step': 10743, 'epoch': 2} {'type': 'loss', 'content': 0.09244098514318466, 'timestamp': '2025-10-01 04:26:13.384070', 'step': 10744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:13.422367', 'step': 10744, 'epoch': 2} {'type': 'loss', 'content': 0.20561937987804413, 'timestamp': '2025-10-01 04:26:13.424975', 'step': 10745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:13.456117', 'step': 10745, 'epoch': 2} {'type': 'loss', 'content': 0.08516153693199158, 'timestamp': '2025-10-01 04:26:13.458670', 'step': 10746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:13.490018', 'step': 10746, 'epoch': 2} {'type': 'loss', 'content': 0.11534757167100906, 'timestamp': '2025-10-01 04:26:13.492543', 'step': 10747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:13.523557', 'step': 10747, 'epoch': 2} {'type': 'loss', 'content': 0.15463285148143768, 'timestamp': '2025-10-01 04:26:13.547646', 'step': 10748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:13.584381', 'step': 10748, 'epoch': 2} {'type': 'loss', 'content': 0.14514972269535065, 'timestamp': '2025-10-01 04:26:13.586999', 'step': 10749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:13.618221', 'step': 10749, 'epoch': 2} {'type': 'loss', 'content': 0.19018042087554932, 'timestamp': '2025-10-01 04:26:13.620471', 'step': 10750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:13.651518', 'step': 10750, 'epoch': 2} {'type': 'loss', 'content': 0.12474095821380615, 'timestamp': '2025-10-01 04:26:13.653765', 'step': 10751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:13.684069', 'step': 10751, 'epoch': 2} {'type': 'loss', 'content': 0.021501081064343452, 'timestamp': '2025-10-01 04:26:13.707879', 'step': 10752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:13.742521', 'step': 10752, 'epoch': 2} {'type': 'loss', 'content': 0.18306951224803925, 'timestamp': '2025-10-01 04:26:13.744641', 'step': 10753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:13.776653', 'step': 10753, 'epoch': 2} {'type': 'loss', 'content': 0.16389064490795135, 'timestamp': '2025-10-01 04:26:13.778866', 'step': 10754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:13.812420', 'step': 10754, 'epoch': 2} {'type': 'loss', 'content': 0.08310263603925705, 'timestamp': '2025-10-01 04:26:13.815137', 'step': 10755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:13.847795', 'step': 10755, 'epoch': 2} {'type': 'loss', 'content': 0.0949520394206047, 'timestamp': '2025-10-01 04:26:13.871574', 'step': 10756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:13.903603', 'step': 10756, 'epoch': 2} {'type': 'loss', 'content': 0.09545648843050003, 'timestamp': '2025-10-01 04:26:13.905774', 'step': 10757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:13.962311', 'step': 10757, 'epoch': 2} {'type': 'loss', 'content': 0.10766685754060745, 'timestamp': '2025-10-01 04:26:13.965469', 'step': 10758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:13.997286', 'step': 10758, 'epoch': 2} {'type': 'loss', 'content': 0.04350825026631355, 'timestamp': '2025-10-01 04:26:13.999780', 'step': 10759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:14.030398', 'step': 10759, 'epoch': 2} {'type': 'loss', 'content': 0.11020851135253906, 'timestamp': '2025-10-01 04:26:14.054268', 'step': 10760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:14.086304', 'step': 10760, 'epoch': 2} {'type': 'loss', 'content': 0.0630987286567688, 'timestamp': '2025-10-01 04:26:14.088803', 'step': 10761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:14.123369', 'step': 10761, 'epoch': 2} {'type': 'loss', 'content': 0.19741126894950867, 'timestamp': '2025-10-01 04:26:14.125441', 'step': 10762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:14.156072', 'step': 10762, 'epoch': 2} {'type': 'loss', 'content': 0.10613738000392914, 'timestamp': '2025-10-01 04:26:14.158288', 'step': 10763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:14.188726', 'step': 10763, 'epoch': 2} {'type': 'loss', 'content': 0.06865273416042328, 'timestamp': '2025-10-01 04:26:14.212372', 'step': 10764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:14.243146', 'step': 10764, 'epoch': 2} {'type': 'loss', 'content': 0.17743133008480072, 'timestamp': '2025-10-01 04:26:14.245750', 'step': 10765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:14.277817', 'step': 10765, 'epoch': 2} {'type': 'loss', 'content': 0.06533797830343246, 'timestamp': '2025-10-01 04:26:14.279938', 'step': 10766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:14.313122', 'step': 10766, 'epoch': 2} {'type': 'loss', 'content': 0.1292497217655182, 'timestamp': '2025-10-01 04:26:14.315243', 'step': 10767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:14.350013', 'step': 10767, 'epoch': 2} {'type': 'loss', 'content': 0.10920509696006775, 'timestamp': '2025-10-01 04:26:14.374844', 'step': 10768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:14.405334', 'step': 10768, 'epoch': 2} {'type': 'loss', 'content': 0.13455937802791595, 'timestamp': '2025-10-01 04:26:14.411860', 'step': 10769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:14.445074', 'step': 10769, 'epoch': 2} {'type': 'loss', 'content': 0.09556487947702408, 'timestamp': '2025-10-01 04:26:14.447451', 'step': 10770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:14.479535', 'step': 10770, 'epoch': 2} {'type': 'loss', 'content': 0.15264539420604706, 'timestamp': '2025-10-01 04:26:14.481582', 'step': 10771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:14.511878', 'step': 10771, 'epoch': 2} {'type': 'loss', 'content': 0.09067539870738983, 'timestamp': '2025-10-01 04:26:14.535500', 'step': 10772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:14.566786', 'step': 10772, 'epoch': 2} {'type': 'loss', 'content': 0.06989439576864243, 'timestamp': '2025-10-01 04:26:14.569066', 'step': 10773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:14.600483', 'step': 10773, 'epoch': 2} {'type': 'loss', 'content': 0.099424809217453, 'timestamp': '2025-10-01 04:26:14.603997', 'step': 10774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:14.635588', 'step': 10774, 'epoch': 2} {'type': 'loss', 'content': 0.11275872588157654, 'timestamp': '2025-10-01 04:26:14.638066', 'step': 10775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:14.669935', 'step': 10775, 'epoch': 2} {'type': 'loss', 'content': 0.08553128689527512, 'timestamp': '2025-10-01 04:26:14.693617', 'step': 10776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:14.726158', 'step': 10776, 'epoch': 2} {'type': 'loss', 'content': 0.07685476541519165, 'timestamp': '2025-10-01 04:26:14.728015', 'step': 10777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:14.762753', 'step': 10777, 'epoch': 2} {'type': 'loss', 'content': 0.11302777379751205, 'timestamp': '2025-10-01 04:26:14.765662', 'step': 10778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:14.800045', 'step': 10778, 'epoch': 2} {'type': 'loss', 'content': 0.13466870784759521, 'timestamp': '2025-10-01 04:26:14.802654', 'step': 10779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:14.834990', 'step': 10779, 'epoch': 2} {'type': 'loss', 'content': 0.08522938191890717, 'timestamp': '2025-10-01 04:26:14.858732', 'step': 10780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:14.895203', 'step': 10780, 'epoch': 2} {'type': 'loss', 'content': 0.1455061137676239, 'timestamp': '2025-10-01 04:26:14.907118', 'step': 10781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:14.939550', 'step': 10781, 'epoch': 2} {'type': 'loss', 'content': 0.06859935820102692, 'timestamp': '2025-10-01 04:26:14.941749', 'step': 10782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:26:14.974139', 'step': 10782, 'epoch': 2} {'type': 'loss', 'content': 0.044458843767642975, 'timestamp': '2025-10-01 04:26:14.978305', 'step': 10783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:15.022899', 'step': 10783, 'epoch': 2} {'type': 'loss', 'content': 0.0694909393787384, 'timestamp': '2025-10-01 04:26:15.046709', 'step': 10784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:15.079785', 'step': 10784, 'epoch': 2} {'type': 'loss', 'content': 0.23249585926532745, 'timestamp': '2025-10-01 04:26:15.081771', 'step': 10785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:15.111880', 'step': 10785, 'epoch': 2} {'type': 'loss', 'content': 0.09192866086959839, 'timestamp': '2025-10-01 04:26:15.114602', 'step': 10786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:15.150138', 'step': 10786, 'epoch': 2} {'type': 'loss', 'content': 0.10574261099100113, 'timestamp': '2025-10-01 04:26:15.152025', 'step': 10787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:26:15.186849', 'step': 10787, 'epoch': 2} {'type': 'loss', 'content': 0.15182442963123322, 'timestamp': '2025-10-01 04:26:15.212693', 'step': 10788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:15.255674', 'step': 10788, 'epoch': 2} {'type': 'loss', 'content': 0.15510323643684387, 'timestamp': '2025-10-01 04:26:15.261272', 'step': 10789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:15.304544', 'step': 10789, 'epoch': 2} {'type': 'loss', 'content': 0.09571867436170578, 'timestamp': '2025-10-01 04:26:15.306890', 'step': 10790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:15.338202', 'step': 10790, 'epoch': 2} {'type': 'loss', 'content': 0.07389392703771591, 'timestamp': '2025-10-01 04:26:15.340689', 'step': 10791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:15.371611', 'step': 10791, 'epoch': 2} {'type': 'loss', 'content': 0.11741496622562408, 'timestamp': '2025-10-01 04:26:15.395117', 'step': 10792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:15.426669', 'step': 10792, 'epoch': 2} {'type': 'loss', 'content': 0.05372050404548645, 'timestamp': '2025-10-01 04:26:15.428969', 'step': 10793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:15.460715', 'step': 10793, 'epoch': 2} {'type': 'loss', 'content': 0.14126962423324585, 'timestamp': '2025-10-01 04:26:15.462922', 'step': 10794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:15.492999', 'step': 10794, 'epoch': 2} {'type': 'loss', 'content': 0.041860271245241165, 'timestamp': '2025-10-01 04:26:15.495383', 'step': 10795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:15.526090', 'step': 10795, 'epoch': 2} {'type': 'loss', 'content': 0.1562800407409668, 'timestamp': '2025-10-01 04:26:15.549628', 'step': 10796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:15.581495', 'step': 10796, 'epoch': 2} {'type': 'loss', 'content': 0.089131660759449, 'timestamp': '2025-10-01 04:26:15.583703', 'step': 10797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:15.621255', 'step': 10797, 'epoch': 2} {'type': 'loss', 'content': 0.08701235800981522, 'timestamp': '2025-10-01 04:26:15.623420', 'step': 10798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:15.653763', 'step': 10798, 'epoch': 2} {'type': 'loss', 'content': 0.17097346484661102, 'timestamp': '2025-10-01 04:26:15.655897', 'step': 10799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-10-01 04:26:15.695067', 'step': 10799, 'epoch': 2} {'type': 'loss', 'content': 0.09415289014577866, 'timestamp': '2025-10-01 04:26:15.732289', 'step': 10800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:15.762001', 'step': 10800, 'epoch': 2} {'type': 'loss', 'content': 0.13851654529571533, 'timestamp': '2025-10-01 04:26:15.764094', 'step': 10801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:15.806705', 'step': 10801, 'epoch': 2} {'type': 'loss', 'content': 0.20022861659526825, 'timestamp': '2025-10-01 04:26:15.811920', 'step': 10802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:15.842255', 'step': 10802, 'epoch': 2} {'type': 'loss', 'content': 0.14127172529697418, 'timestamp': '2025-10-01 04:26:15.844617', 'step': 10803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:15.876750', 'step': 10803, 'epoch': 2} {'type': 'loss', 'content': 0.18644234538078308, 'timestamp': '2025-10-01 04:26:15.900220', 'step': 10804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:15.930535', 'step': 10804, 'epoch': 2} {'type': 'loss', 'content': 0.0857589840888977, 'timestamp': '2025-10-01 04:26:15.932591', 'step': 10805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:15.964654', 'step': 10805, 'epoch': 2} {'type': 'loss', 'content': 0.12152273952960968, 'timestamp': '2025-10-01 04:26:15.966753', 'step': 10806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:16.003540', 'step': 10806, 'epoch': 2} {'type': 'loss', 'content': 0.09939118474721909, 'timestamp': '2025-10-01 04:26:16.007508', 'step': 10807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:16.038438', 'step': 10807, 'epoch': 2} {'type': 'loss', 'content': 0.09126181155443192, 'timestamp': '2025-10-01 04:26:16.062000', 'step': 10808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:16.092254', 'step': 10808, 'epoch': 2} {'type': 'loss', 'content': 0.14743807911872864, 'timestamp': '2025-10-01 04:26:16.094570', 'step': 10809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:16.125320', 'step': 10809, 'epoch': 2} {'type': 'loss', 'content': 0.06927099078893661, 'timestamp': '2025-10-01 04:26:16.127983', 'step': 10810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:16.158142', 'step': 10810, 'epoch': 2} {'type': 'loss', 'content': 0.08554516732692719, 'timestamp': '2025-10-01 04:26:16.160217', 'step': 10811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:16.189802', 'step': 10811, 'epoch': 2} {'type': 'loss', 'content': 0.08848113566637039, 'timestamp': '2025-10-01 04:26:16.213366', 'step': 10812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:16.244517', 'step': 10812, 'epoch': 2} {'type': 'loss', 'content': 0.07092903554439545, 'timestamp': '2025-10-01 04:26:16.246546', 'step': 10813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:16.277634', 'step': 10813, 'epoch': 2} {'type': 'loss', 'content': 0.12468443065881729, 'timestamp': '2025-10-01 04:26:16.279729', 'step': 10814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:16.309928', 'step': 10814, 'epoch': 2} {'type': 'loss', 'content': 0.16243985295295715, 'timestamp': '2025-10-01 04:26:16.311990', 'step': 10815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:16.342376', 'step': 10815, 'epoch': 2} {'type': 'loss', 'content': 0.12198695540428162, 'timestamp': '2025-10-01 04:26:16.366128', 'step': 10816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:16.405181', 'step': 10816, 'epoch': 2} {'type': 'loss', 'content': 0.10178276151418686, 'timestamp': '2025-10-01 04:26:16.417520', 'step': 10817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:16.449083', 'step': 10817, 'epoch': 2} {'type': 'loss', 'content': 0.06747611612081528, 'timestamp': '2025-10-01 04:26:16.453092', 'step': 10818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:16.499498', 'step': 10818, 'epoch': 2} {'type': 'loss', 'content': 0.06467585265636444, 'timestamp': '2025-10-01 04:26:16.501858', 'step': 10819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:16.533049', 'step': 10819, 'epoch': 2} {'type': 'loss', 'content': 0.14998169243335724, 'timestamp': '2025-10-01 04:26:16.556779', 'step': 10820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:16.587154', 'step': 10820, 'epoch': 2} {'type': 'loss', 'content': 0.1382097601890564, 'timestamp': '2025-10-01 04:26:16.589269', 'step': 10821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:16.619546', 'step': 10821, 'epoch': 2} {'type': 'loss', 'content': 0.14021646976470947, 'timestamp': '2025-10-01 04:26:16.621673', 'step': 10822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:16.651606', 'step': 10822, 'epoch': 2} {'type': 'loss', 'content': 0.035886041820049286, 'timestamp': '2025-10-01 04:26:16.654479', 'step': 10823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:16.684864', 'step': 10823, 'epoch': 2} {'type': 'loss', 'content': 0.08032827079296112, 'timestamp': '2025-10-01 04:26:16.708400', 'step': 10824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:16.738595', 'step': 10824, 'epoch': 2} {'type': 'loss', 'content': 0.20696906745433807, 'timestamp': '2025-10-01 04:26:16.740684', 'step': 10825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:16.771693', 'step': 10825, 'epoch': 2} {'type': 'loss', 'content': 0.04874107241630554, 'timestamp': '2025-10-01 04:26:16.773580', 'step': 10826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:16.804929', 'step': 10826, 'epoch': 2} {'type': 'loss', 'content': 0.13484744727611542, 'timestamp': '2025-10-01 04:26:16.806992', 'step': 10827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:26:16.837634', 'step': 10827, 'epoch': 2} {'type': 'loss', 'content': 0.08188043534755707, 'timestamp': '2025-10-01 04:26:16.863056', 'step': 10828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:16.894126', 'step': 10828, 'epoch': 2} {'type': 'loss', 'content': 0.13400106132030487, 'timestamp': '2025-10-01 04:26:16.896258', 'step': 10829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:16.926469', 'step': 10829, 'epoch': 2} {'type': 'loss', 'content': 0.07806050032377243, 'timestamp': '2025-10-01 04:26:16.928554', 'step': 10830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:16.959059', 'step': 10830, 'epoch': 2} {'type': 'loss', 'content': 0.14336541295051575, 'timestamp': '2025-10-01 04:26:16.961306', 'step': 10831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:16.997505', 'step': 10831, 'epoch': 2} {'type': 'loss', 'content': 0.10072100162506104, 'timestamp': '2025-10-01 04:26:17.021179', 'step': 10832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:17.051816', 'step': 10832, 'epoch': 2} {'type': 'loss', 'content': 0.08778326958417892, 'timestamp': '2025-10-01 04:26:17.053660', 'step': 10833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:17.084081', 'step': 10833, 'epoch': 2} {'type': 'loss', 'content': 0.136320561170578, 'timestamp': '2025-10-01 04:26:17.086772', 'step': 10834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.117236', 'step': 10834, 'epoch': 2} {'type': 'loss', 'content': 0.06602833420038223, 'timestamp': '2025-10-01 04:26:17.119623', 'step': 10835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.154791', 'step': 10835, 'epoch': 2} {'type': 'loss', 'content': 0.1613817811012268, 'timestamp': '2025-10-01 04:26:17.178904', 'step': 10836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:17.210187', 'step': 10836, 'epoch': 2} {'type': 'loss', 'content': 0.08310174942016602, 'timestamp': '2025-10-01 04:26:17.212640', 'step': 10837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:17.250809', 'step': 10837, 'epoch': 2} {'type': 'loss', 'content': 0.11225829273462296, 'timestamp': '2025-10-01 04:26:17.253083', 'step': 10838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.286128', 'step': 10838, 'epoch': 2} {'type': 'loss', 'content': 0.09781912714242935, 'timestamp': '2025-10-01 04:26:17.302535', 'step': 10839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:17.340626', 'step': 10839, 'epoch': 2} {'type': 'loss', 'content': 0.13228580355644226, 'timestamp': '2025-10-01 04:26:17.369068', 'step': 10840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:17.400723', 'step': 10840, 'epoch': 2} {'type': 'loss', 'content': 0.10359055548906326, 'timestamp': '2025-10-01 04:26:17.403143', 'step': 10841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:17.434256', 'step': 10841, 'epoch': 2} {'type': 'loss', 'content': 0.15687231719493866, 'timestamp': '2025-10-01 04:26:17.436613', 'step': 10842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.474794', 'step': 10842, 'epoch': 2} {'type': 'loss', 'content': 0.1424727737903595, 'timestamp': '2025-10-01 04:26:17.477071', 'step': 10843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.507927', 'step': 10843, 'epoch': 2} {'type': 'loss', 'content': 0.05489419773221016, 'timestamp': '2025-10-01 04:26:17.531714', 'step': 10844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.562143', 'step': 10844, 'epoch': 2} {'type': 'loss', 'content': 0.13413794338703156, 'timestamp': '2025-10-01 04:26:17.564606', 'step': 10845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:17.596805', 'step': 10845, 'epoch': 2} {'type': 'loss', 'content': 0.13084644079208374, 'timestamp': '2025-10-01 04:26:17.599398', 'step': 10846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:17.630305', 'step': 10846, 'epoch': 2} {'type': 'loss', 'content': 0.08662804961204529, 'timestamp': '2025-10-01 04:26:17.632472', 'step': 10847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.663378', 'step': 10847, 'epoch': 2} {'type': 'loss', 'content': 0.06427941471338272, 'timestamp': '2025-10-01 04:26:17.688227', 'step': 10848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.719477', 'step': 10848, 'epoch': 2} {'type': 'loss', 'content': 0.12209925800561905, 'timestamp': '2025-10-01 04:26:17.722394', 'step': 10849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.753164', 'step': 10849, 'epoch': 2} {'type': 'loss', 'content': 0.1320098489522934, 'timestamp': '2025-10-01 04:26:17.755508', 'step': 10850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.788100', 'step': 10850, 'epoch': 2} {'type': 'loss', 'content': 0.1517714411020279, 'timestamp': '2025-10-01 04:26:17.790370', 'step': 10851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:17.833461', 'step': 10851, 'epoch': 2} {'type': 'loss', 'content': 0.1082744300365448, 'timestamp': '2025-10-01 04:26:17.857261', 'step': 10852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:17.888004', 'step': 10852, 'epoch': 2} {'type': 'loss', 'content': 0.07905161380767822, 'timestamp': '2025-10-01 04:26:17.890292', 'step': 10853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:17.921494', 'step': 10853, 'epoch': 2} {'type': 'loss', 'content': 0.08268337696790695, 'timestamp': '2025-10-01 04:26:17.929876', 'step': 10854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:17.961889', 'step': 10854, 'epoch': 2} {'type': 'loss', 'content': 0.047850415110588074, 'timestamp': '2025-10-01 04:26:17.964113', 'step': 10855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.005859', 'step': 10855, 'epoch': 2} {'type': 'loss', 'content': 0.13935460150241852, 'timestamp': '2025-10-01 04:26:18.029682', 'step': 10856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:18.060166', 'step': 10856, 'epoch': 2} {'type': 'loss', 'content': 0.13937801122665405, 'timestamp': '2025-10-01 04:26:18.062644', 'step': 10857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.095508', 'step': 10857, 'epoch': 2} {'type': 'loss', 'content': 0.13915902376174927, 'timestamp': '2025-10-01 04:26:18.097897', 'step': 10858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.129084', 'step': 10858, 'epoch': 2} {'type': 'loss', 'content': 0.15842154622077942, 'timestamp': '2025-10-01 04:26:18.131376', 'step': 10859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.161510', 'step': 10859, 'epoch': 2} {'type': 'loss', 'content': 0.041742175817489624, 'timestamp': '2025-10-01 04:26:18.185572', 'step': 10860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.221440', 'step': 10860, 'epoch': 2} {'type': 'loss', 'content': 0.11275246739387512, 'timestamp': '2025-10-01 04:26:18.223813', 'step': 10861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:18.254612', 'step': 10861, 'epoch': 2} {'type': 'loss', 'content': 0.12406366318464279, 'timestamp': '2025-10-01 04:26:18.257006', 'step': 10862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:18.287561', 'step': 10862, 'epoch': 2} {'type': 'loss', 'content': 0.17187345027923584, 'timestamp': '2025-10-01 04:26:18.290001', 'step': 10863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:18.321642', 'step': 10863, 'epoch': 2} {'type': 'loss', 'content': 0.1316477507352829, 'timestamp': '2025-10-01 04:26:18.345326', 'step': 10864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.375694', 'step': 10864, 'epoch': 2} {'type': 'loss', 'content': 0.10228383541107178, 'timestamp': '2025-10-01 04:26:18.377932', 'step': 10865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:18.407693', 'step': 10865, 'epoch': 2} {'type': 'loss', 'content': 0.08818813413381577, 'timestamp': '2025-10-01 04:26:18.413450', 'step': 10866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:18.452444', 'step': 10866, 'epoch': 2} {'type': 'loss', 'content': 0.1267014443874359, 'timestamp': '2025-10-01 04:26:18.455154', 'step': 10867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:18.487054', 'step': 10867, 'epoch': 2} {'type': 'loss', 'content': 0.1400880217552185, 'timestamp': '2025-10-01 04:26:18.510925', 'step': 10868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:18.546905', 'step': 10868, 'epoch': 2} {'type': 'loss', 'content': 0.07867424190044403, 'timestamp': '2025-10-01 04:26:18.549234', 'step': 10869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:18.579792', 'step': 10869, 'epoch': 2} {'type': 'loss', 'content': 0.06089486926794052, 'timestamp': '2025-10-01 04:26:18.582631', 'step': 10870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:18.614318', 'step': 10870, 'epoch': 2} {'type': 'loss', 'content': 0.03418684005737305, 'timestamp': '2025-10-01 04:26:18.616636', 'step': 10871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:18.646990', 'step': 10871, 'epoch': 2} {'type': 'loss', 'content': 0.15246154367923737, 'timestamp': '2025-10-01 04:26:18.671047', 'step': 10872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.702515', 'step': 10872, 'epoch': 2} {'type': 'loss', 'content': 0.1482275426387787, 'timestamp': '2025-10-01 04:26:18.704683', 'step': 10873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:18.736319', 'step': 10873, 'epoch': 2} {'type': 'loss', 'content': 0.07354209572076797, 'timestamp': '2025-10-01 04:26:18.739812', 'step': 10874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.771606', 'step': 10874, 'epoch': 2} {'type': 'loss', 'content': 0.11395441740751266, 'timestamp': '2025-10-01 04:26:18.775497', 'step': 10875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:18.806988', 'step': 10875, 'epoch': 2} {'type': 'loss', 'content': 0.1239934116601944, 'timestamp': '2025-10-01 04:26:18.830792', 'step': 10876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:18.861716', 'step': 10876, 'epoch': 2} {'type': 'loss', 'content': 0.04026208445429802, 'timestamp': '2025-10-01 04:26:18.868786', 'step': 10877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.900798', 'step': 10877, 'epoch': 2} {'type': 'loss', 'content': 0.09013523161411285, 'timestamp': '2025-10-01 04:26:18.903528', 'step': 10878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.935538', 'step': 10878, 'epoch': 2} {'type': 'loss', 'content': 0.11237050592899323, 'timestamp': '2025-10-01 04:26:18.937868', 'step': 10879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:18.968777', 'step': 10879, 'epoch': 2} {'type': 'loss', 'content': 0.05475717782974243, 'timestamp': '2025-10-01 04:26:18.992551', 'step': 10880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.023657', 'step': 10880, 'epoch': 2} {'type': 'loss', 'content': 0.12807898223400116, 'timestamp': '2025-10-01 04:26:19.026398', 'step': 10881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.056915', 'step': 10881, 'epoch': 2} {'type': 'loss', 'content': 0.12610305845737457, 'timestamp': '2025-10-01 04:26:19.059139', 'step': 10882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:19.089872', 'step': 10882, 'epoch': 2} {'type': 'loss', 'content': 0.14270780980587006, 'timestamp': '2025-10-01 04:26:19.092238', 'step': 10883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:19.122723', 'step': 10883, 'epoch': 2} {'type': 'loss', 'content': 0.08876202255487442, 'timestamp': '2025-10-01 04:26:19.146553', 'step': 10884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.178155', 'step': 10884, 'epoch': 2} {'type': 'loss', 'content': 0.04827137291431427, 'timestamp': '2025-10-01 04:26:19.181003', 'step': 10885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.214044', 'step': 10885, 'epoch': 2} {'type': 'loss', 'content': 0.050194818526506424, 'timestamp': '2025-10-01 04:26:19.216288', 'step': 10886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:19.281325', 'step': 10886, 'epoch': 2} {'type': 'loss', 'content': 0.2037871927022934, 'timestamp': '2025-10-01 04:26:19.283658', 'step': 10887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:19.314407', 'step': 10887, 'epoch': 2} {'type': 'loss', 'content': 0.189627543091774, 'timestamp': '2025-10-01 04:26:19.338135', 'step': 10888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:19.368512', 'step': 10888, 'epoch': 2} {'type': 'loss', 'content': 0.09263013303279877, 'timestamp': '2025-10-01 04:26:19.370966', 'step': 10889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:19.402023', 'step': 10889, 'epoch': 2} {'type': 'loss', 'content': 0.15894345939159393, 'timestamp': '2025-10-01 04:26:19.404576', 'step': 10890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.435926', 'step': 10890, 'epoch': 2} {'type': 'loss', 'content': 0.09858261793851852, 'timestamp': '2025-10-01 04:26:19.438451', 'step': 10891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.468742', 'step': 10891, 'epoch': 2} {'type': 'loss', 'content': 0.08406960964202881, 'timestamp': '2025-10-01 04:26:19.492436', 'step': 10892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.522948', 'step': 10892, 'epoch': 2} {'type': 'loss', 'content': 0.039212796837091446, 'timestamp': '2025-10-01 04:26:19.525136', 'step': 10893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.556697', 'step': 10893, 'epoch': 2} {'type': 'loss', 'content': 0.1670571118593216, 'timestamp': '2025-10-01 04:26:19.558770', 'step': 10894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.593188', 'step': 10894, 'epoch': 2} {'type': 'loss', 'content': 0.06624332815408707, 'timestamp': '2025-10-01 04:26:19.595806', 'step': 10895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.626343', 'step': 10895, 'epoch': 2} {'type': 'loss', 'content': 0.19416896998882294, 'timestamp': '2025-10-01 04:26:19.657293', 'step': 10896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:19.688709', 'step': 10896, 'epoch': 2} {'type': 'loss', 'content': 0.06324756145477295, 'timestamp': '2025-10-01 04:26:19.702846', 'step': 10897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.773726', 'step': 10897, 'epoch': 2} {'type': 'loss', 'content': 0.193134605884552, 'timestamp': '2025-10-01 04:26:19.785507', 'step': 10898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:19.818214', 'step': 10898, 'epoch': 2} {'type': 'loss', 'content': 0.09015427529811859, 'timestamp': '2025-10-01 04:26:19.820633', 'step': 10899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:19.861770', 'step': 10899, 'epoch': 2} {'type': 'loss', 'content': 0.10988086462020874, 'timestamp': '2025-10-01 04:26:19.885457', 'step': 10900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:19.919201', 'step': 10900, 'epoch': 2} {'type': 'loss', 'content': 0.09030181169509888, 'timestamp': '2025-10-01 04:26:19.921380', 'step': 10901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:19.953044', 'step': 10901, 'epoch': 2} {'type': 'loss', 'content': 0.1571013331413269, 'timestamp': '2025-10-01 04:26:19.955319', 'step': 10902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:19.995689', 'step': 10902, 'epoch': 2} {'type': 'loss', 'content': 0.12309789657592773, 'timestamp': '2025-10-01 04:26:20.006946', 'step': 10903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:20.037595', 'step': 10903, 'epoch': 2} {'type': 'loss', 'content': 0.07337247580289841, 'timestamp': '2025-10-01 04:26:20.061489', 'step': 10904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:20.091629', 'step': 10904, 'epoch': 2} {'type': 'loss', 'content': 0.12711256742477417, 'timestamp': '2025-10-01 04:26:20.093960', 'step': 10905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:20.124384', 'step': 10905, 'epoch': 2} {'type': 'loss', 'content': 0.0977344885468483, 'timestamp': '2025-10-01 04:26:20.126972', 'step': 10906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:20.157027', 'step': 10906, 'epoch': 2} {'type': 'loss', 'content': 0.08330073952674866, 'timestamp': '2025-10-01 04:26:20.159303', 'step': 10907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:20.191370', 'step': 10907, 'epoch': 2} {'type': 'loss', 'content': 0.15596719086170197, 'timestamp': '2025-10-01 04:26:20.215006', 'step': 10908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:20.245963', 'step': 10908, 'epoch': 2} {'type': 'loss', 'content': 0.1332862377166748, 'timestamp': '2025-10-01 04:26:20.248264', 'step': 10909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:20.279713', 'step': 10909, 'epoch': 2} {'type': 'loss', 'content': 0.12389884144067764, 'timestamp': '2025-10-01 04:26:20.282770', 'step': 10910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:20.314158', 'step': 10910, 'epoch': 2} {'type': 'loss', 'content': 0.15490302443504333, 'timestamp': '2025-10-01 04:26:20.318295', 'step': 10911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:20.355646', 'step': 10911, 'epoch': 2} {'type': 'loss', 'content': 0.1663903146982193, 'timestamp': '2025-10-01 04:26:20.379269', 'step': 10912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:20.410227', 'step': 10912, 'epoch': 2} {'type': 'loss', 'content': 0.09438501298427582, 'timestamp': '2025-10-01 04:26:20.422411', 'step': 10913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:20.465697', 'step': 10913, 'epoch': 2} {'type': 'loss', 'content': 0.13159143924713135, 'timestamp': '2025-10-01 04:26:20.468186', 'step': 10914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:20.499521', 'step': 10914, 'epoch': 2} {'type': 'loss', 'content': 0.08598557859659195, 'timestamp': '2025-10-01 04:26:20.501570', 'step': 10915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:20.531718', 'step': 10915, 'epoch': 2} {'type': 'loss', 'content': 0.14101290702819824, 'timestamp': '2025-10-01 04:26:20.563531', 'step': 10916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:20.597759', 'step': 10916, 'epoch': 2} {'type': 'loss', 'content': 0.039886727929115295, 'timestamp': '2025-10-01 04:26:20.610045', 'step': 10917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:20.642281', 'step': 10917, 'epoch': 2} {'type': 'loss', 'content': 0.06340432912111282, 'timestamp': '2025-10-01 04:26:20.644796', 'step': 10918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:20.680286', 'step': 10918, 'epoch': 2} {'type': 'loss', 'content': 0.11482314020395279, 'timestamp': '2025-10-01 04:26:20.684206', 'step': 10919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:20.714364', 'step': 10919, 'epoch': 2} {'type': 'loss', 'content': 0.12904761731624603, 'timestamp': '2025-10-01 04:26:20.745679', 'step': 10920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:20.776658', 'step': 10920, 'epoch': 2} {'type': 'loss', 'content': 0.08231087028980255, 'timestamp': '2025-10-01 04:26:20.784581', 'step': 10921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:20.816695', 'step': 10921, 'epoch': 2} {'type': 'loss', 'content': 0.03719497099518776, 'timestamp': '2025-10-01 04:26:20.820478', 'step': 10922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:20.855672', 'step': 10922, 'epoch': 2} {'type': 'loss', 'content': 0.1449556052684784, 'timestamp': '2025-10-01 04:26:20.861751', 'step': 10923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:20.899111', 'step': 10923, 'epoch': 2} {'type': 'loss', 'content': 0.10093995928764343, 'timestamp': '2025-10-01 04:26:20.924524', 'step': 10924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:20.970024', 'step': 10924, 'epoch': 2} {'type': 'loss', 'content': 0.1709708571434021, 'timestamp': '2025-10-01 04:26:20.972588', 'step': 10925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:21.002269', 'step': 10925, 'epoch': 2} {'type': 'loss', 'content': 0.09120269864797592, 'timestamp': '2025-10-01 04:26:21.004507', 'step': 10926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:21.038923', 'step': 10926, 'epoch': 2} {'type': 'loss', 'content': 0.1420791745185852, 'timestamp': '2025-10-01 04:26:21.047314', 'step': 10927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:21.079850', 'step': 10927, 'epoch': 2} {'type': 'loss', 'content': 0.06683129817247391, 'timestamp': '2025-10-01 04:26:21.106537', 'step': 10928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:21.137035', 'step': 10928, 'epoch': 2} {'type': 'loss', 'content': 0.10165717452764511, 'timestamp': '2025-10-01 04:26:21.139290', 'step': 10929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:21.172710', 'step': 10929, 'epoch': 2} {'type': 'loss', 'content': 0.09236790984869003, 'timestamp': '2025-10-01 04:26:21.186049', 'step': 10930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:21.216529', 'step': 10930, 'epoch': 2} {'type': 'loss', 'content': 0.16272175312042236, 'timestamp': '2025-10-01 04:26:21.219483', 'step': 10931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:21.252011', 'step': 10931, 'epoch': 2} {'type': 'loss', 'content': 0.08953535556793213, 'timestamp': '2025-10-01 04:26:21.276451', 'step': 10932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:21.327146', 'step': 10932, 'epoch': 2} {'type': 'loss', 'content': 0.07651657611131668, 'timestamp': '2025-10-01 04:26:21.329565', 'step': 10933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:21.376642', 'step': 10933, 'epoch': 2} {'type': 'loss', 'content': 0.11299171298742294, 'timestamp': '2025-10-01 04:26:21.387468', 'step': 10934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:21.421181', 'step': 10934, 'epoch': 2} {'type': 'loss', 'content': 0.13272130489349365, 'timestamp': '2025-10-01 04:26:21.423774', 'step': 10935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:21.459474', 'step': 10935, 'epoch': 2} {'type': 'loss', 'content': 0.20747384428977966, 'timestamp': '2025-10-01 04:26:21.483878', 'step': 10936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:21.515142', 'step': 10936, 'epoch': 2} {'type': 'loss', 'content': 0.0959743782877922, 'timestamp': '2025-10-01 04:26:21.517775', 'step': 10937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:21.548357', 'step': 10937, 'epoch': 2} {'type': 'loss', 'content': 0.11486499756574631, 'timestamp': '2025-10-01 04:26:21.550612', 'step': 10938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:21.589229', 'step': 10938, 'epoch': 2} {'type': 'loss', 'content': 0.11317160725593567, 'timestamp': '2025-10-01 04:26:21.593603', 'step': 10939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:21.626235', 'step': 10939, 'epoch': 2} {'type': 'loss', 'content': 0.1617993861436844, 'timestamp': '2025-10-01 04:26:21.666124', 'step': 10940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:21.697334', 'step': 10940, 'epoch': 2} {'type': 'loss', 'content': 0.12403976172208786, 'timestamp': '2025-10-01 04:26:21.699895', 'step': 10941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:21.731157', 'step': 10941, 'epoch': 2} {'type': 'loss', 'content': 0.1933746486902237, 'timestamp': '2025-10-01 04:26:21.745423', 'step': 10942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:21.776759', 'step': 10942, 'epoch': 2} {'type': 'loss', 'content': 0.16759279370307922, 'timestamp': '2025-10-01 04:26:21.780308', 'step': 10943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:21.817052', 'step': 10943, 'epoch': 2} {'type': 'loss', 'content': 0.11084146052598953, 'timestamp': '2025-10-01 04:26:21.841215', 'step': 10944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:21.873807', 'step': 10944, 'epoch': 2} {'type': 'loss', 'content': 0.08272681385278702, 'timestamp': '2025-10-01 04:26:21.876695', 'step': 10945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:21.907791', 'step': 10945, 'epoch': 2} {'type': 'loss', 'content': 0.09006864577531815, 'timestamp': '2025-10-01 04:26:21.910772', 'step': 10946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:21.941948', 'step': 10946, 'epoch': 2} {'type': 'loss', 'content': 0.1048220843076706, 'timestamp': '2025-10-01 04:26:21.944866', 'step': 10947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:21.975903', 'step': 10947, 'epoch': 2} {'type': 'loss', 'content': 0.08664823323488235, 'timestamp': '2025-10-01 04:26:22.000003', 'step': 10948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:22.035369', 'step': 10948, 'epoch': 2} {'type': 'loss', 'content': 0.12467899173498154, 'timestamp': '2025-10-01 04:26:22.037921', 'step': 10949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:22.068648', 'step': 10949, 'epoch': 2} {'type': 'loss', 'content': 0.06313492357730865, 'timestamp': '2025-10-01 04:26:22.077450', 'step': 10950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:22.111070', 'step': 10950, 'epoch': 2} {'type': 'loss', 'content': 0.0919623002409935, 'timestamp': '2025-10-01 04:26:22.117682', 'step': 10951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:22.151523', 'step': 10951, 'epoch': 2} {'type': 'loss', 'content': 0.10010134428739548, 'timestamp': '2025-10-01 04:26:22.184519', 'step': 10952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:22.216687', 'step': 10952, 'epoch': 2} {'type': 'loss', 'content': 0.056536994874477386, 'timestamp': '2025-10-01 04:26:22.219503', 'step': 10953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:22.260552', 'step': 10953, 'epoch': 2} {'type': 'loss', 'content': 0.13412834703922272, 'timestamp': '2025-10-01 04:26:22.263232', 'step': 10954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:22.293295', 'step': 10954, 'epoch': 2} {'type': 'loss', 'content': 0.10325437039136887, 'timestamp': '2025-10-01 04:26:22.313962', 'step': 10955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:22.345594', 'step': 10955, 'epoch': 2} {'type': 'loss', 'content': 0.23859882354736328, 'timestamp': '2025-10-01 04:26:22.369739', 'step': 10956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:26:22.400773', 'step': 10956, 'epoch': 2} {'type': 'loss', 'content': 0.06795474141836166, 'timestamp': '2025-10-01 04:26:22.403426', 'step': 10957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:22.434237', 'step': 10957, 'epoch': 2} {'type': 'loss', 'content': 0.13325312733650208, 'timestamp': '2025-10-01 04:26:22.436643', 'step': 10958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:22.467591', 'step': 10958, 'epoch': 2} {'type': 'loss', 'content': 0.1339581161737442, 'timestamp': '2025-10-01 04:26:22.474563', 'step': 10959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:22.505347', 'step': 10959, 'epoch': 2} {'type': 'loss', 'content': 0.08419110625982285, 'timestamp': '2025-10-01 04:26:22.530810', 'step': 10960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:22.562154', 'step': 10960, 'epoch': 2} {'type': 'loss', 'content': 0.16380296647548676, 'timestamp': '2025-10-01 04:26:22.574430', 'step': 10961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:22.605587', 'step': 10961, 'epoch': 2} {'type': 'loss', 'content': 0.09330474585294724, 'timestamp': '2025-10-01 04:26:22.608720', 'step': 10962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:22.650978', 'step': 10962, 'epoch': 2} {'type': 'loss', 'content': 0.07265091687440872, 'timestamp': '2025-10-01 04:26:22.653521', 'step': 10963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:22.684082', 'step': 10963, 'epoch': 2} {'type': 'loss', 'content': 0.12699513137340546, 'timestamp': '2025-10-01 04:26:22.708162', 'step': 10964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:22.738581', 'step': 10964, 'epoch': 2} {'type': 'loss', 'content': 0.09374696016311646, 'timestamp': '2025-10-01 04:26:22.741297', 'step': 10965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:22.772005', 'step': 10965, 'epoch': 2} {'type': 'loss', 'content': 0.0856640562415123, 'timestamp': '2025-10-01 04:26:22.774656', 'step': 10966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:22.807063', 'step': 10966, 'epoch': 2} {'type': 'loss', 'content': 0.09967876225709915, 'timestamp': '2025-10-01 04:26:22.822291', 'step': 10967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:22.856140', 'step': 10967, 'epoch': 2} {'type': 'loss', 'content': 0.10632695257663727, 'timestamp': '2025-10-01 04:26:22.880471', 'step': 10968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:22.912177', 'step': 10968, 'epoch': 2} {'type': 'loss', 'content': 0.08351674675941467, 'timestamp': '2025-10-01 04:26:22.914507', 'step': 10969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:22.946427', 'step': 10969, 'epoch': 2} {'type': 'loss', 'content': 0.13383622467517853, 'timestamp': '2025-10-01 04:26:22.948938', 'step': 10970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:22.980381', 'step': 10970, 'epoch': 2} {'type': 'loss', 'content': 0.14818644523620605, 'timestamp': '2025-10-01 04:26:22.984039', 'step': 10971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:23.015411', 'step': 10971, 'epoch': 2} {'type': 'loss', 'content': 0.13195469975471497, 'timestamp': '2025-10-01 04:26:23.039345', 'step': 10972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:23.071296', 'step': 10972, 'epoch': 2} {'type': 'loss', 'content': 0.05393528938293457, 'timestamp': '2025-10-01 04:26:23.074239', 'step': 10973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:23.105668', 'step': 10973, 'epoch': 2} {'type': 'loss', 'content': 0.11626704037189484, 'timestamp': '2025-10-01 04:26:23.108175', 'step': 10974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:23.138812', 'step': 10974, 'epoch': 2} {'type': 'loss', 'content': 0.19218800961971283, 'timestamp': '2025-10-01 04:26:23.141606', 'step': 10975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:23.171782', 'step': 10975, 'epoch': 2} {'type': 'loss', 'content': 0.09182318300008774, 'timestamp': '2025-10-01 04:26:23.195564', 'step': 10976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:23.228543', 'step': 10976, 'epoch': 2} {'type': 'loss', 'content': 0.13053293526172638, 'timestamp': '2025-10-01 04:26:23.231148', 'step': 10977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:23.262664', 'step': 10977, 'epoch': 2} {'type': 'loss', 'content': 0.12043353170156479, 'timestamp': '2025-10-01 04:26:23.270893', 'step': 10978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:23.302348', 'step': 10978, 'epoch': 2} {'type': 'loss', 'content': 0.11155839264392853, 'timestamp': '2025-10-01 04:26:23.305176', 'step': 10979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:23.337000', 'step': 10979, 'epoch': 2} {'type': 'loss', 'content': 0.1766393482685089, 'timestamp': '2025-10-01 04:26:23.362445', 'step': 10980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:23.395065', 'step': 10980, 'epoch': 2} {'type': 'loss', 'content': 0.09394808858633041, 'timestamp': '2025-10-01 04:26:23.398302', 'step': 10981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:23.435225', 'step': 10981, 'epoch': 2} {'type': 'loss', 'content': 0.09905214607715607, 'timestamp': '2025-10-01 04:26:23.440763', 'step': 10982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:23.472023', 'step': 10982, 'epoch': 2} {'type': 'loss', 'content': 0.16018767654895782, 'timestamp': '2025-10-01 04:26:23.477195', 'step': 10983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:23.507871', 'step': 10983, 'epoch': 2} {'type': 'loss', 'content': 0.07468210160732269, 'timestamp': '2025-10-01 04:26:23.531806', 'step': 10984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:26:23.564042', 'step': 10984, 'epoch': 2} {'type': 'loss', 'content': 0.07494385540485382, 'timestamp': '2025-10-01 04:26:23.573313', 'step': 10985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:23.604587', 'step': 10985, 'epoch': 2} {'type': 'loss', 'content': 0.0988042801618576, 'timestamp': '2025-10-01 04:26:23.606933', 'step': 10986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:23.638687', 'step': 10986, 'epoch': 2} {'type': 'loss', 'content': 0.1850426197052002, 'timestamp': '2025-10-01 04:26:23.641360', 'step': 10987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:23.677441', 'step': 10987, 'epoch': 2} {'type': 'loss', 'content': 0.07935875654220581, 'timestamp': '2025-10-01 04:26:23.701481', 'step': 10988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:23.745335', 'step': 10988, 'epoch': 2} {'type': 'loss', 'content': 0.13733190298080444, 'timestamp': '2025-10-01 04:26:23.761836', 'step': 10989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:23.793248', 'step': 10989, 'epoch': 2} {'type': 'loss', 'content': 0.11116685718297958, 'timestamp': '2025-10-01 04:26:23.795563', 'step': 10990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:23.826973', 'step': 10990, 'epoch': 2} {'type': 'loss', 'content': 0.08860592544078827, 'timestamp': '2025-10-01 04:26:23.829404', 'step': 10991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:23.872783', 'step': 10991, 'epoch': 2} {'type': 'loss', 'content': 0.06351155787706375, 'timestamp': '2025-10-01 04:26:23.896556', 'step': 10992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:23.935545', 'step': 10992, 'epoch': 2} {'type': 'loss', 'content': 0.15121936798095703, 'timestamp': '2025-10-01 04:26:23.938467', 'step': 10993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:23.971579', 'step': 10993, 'epoch': 2} {'type': 'loss', 'content': 0.1384267359972, 'timestamp': '2025-10-01 04:26:23.974019', 'step': 10994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:24.006752', 'step': 10994, 'epoch': 2} {'type': 'loss', 'content': 0.059110529720783234, 'timestamp': '2025-10-01 04:26:24.009086', 'step': 10995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:24.043316', 'step': 10995, 'epoch': 2} {'type': 'loss', 'content': 0.13094177842140198, 'timestamp': '2025-10-01 04:26:24.066947', 'step': 10996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:24.096877', 'step': 10996, 'epoch': 2} {'type': 'loss', 'content': 0.1271023154258728, 'timestamp': '2025-10-01 04:26:24.103891', 'step': 10997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:24.139113', 'step': 10997, 'epoch': 2} {'type': 'loss', 'content': 0.04840358719229698, 'timestamp': '2025-10-01 04:26:24.145064', 'step': 10998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:24.178591', 'step': 10998, 'epoch': 2} {'type': 'loss', 'content': 0.17447327077388763, 'timestamp': '2025-10-01 04:26:24.180685', 'step': 10999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:24.215293', 'step': 10999, 'epoch': 2} {'type': 'loss', 'content': 0.15747803449630737, 'timestamp': '2025-10-01 04:26:24.240399', 'step': 11000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11000', 'timestamp': '2025-10-01 04:26:29.639876', 'step': 11000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:29.686434', 'step': 11000, 'epoch': 2} {'type': 'loss', 'content': 0.12574760615825653, 'timestamp': '2025-10-01 04:26:29.688991', 'step': 11001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:29.721135', 'step': 11001, 'epoch': 2} {'type': 'loss', 'content': 0.16373929381370544, 'timestamp': '2025-10-01 04:26:29.723377', 'step': 11002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:29.755179', 'step': 11002, 'epoch': 2} {'type': 'loss', 'content': 0.15250568091869354, 'timestamp': '2025-10-01 04:26:29.757433', 'step': 11003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:29.788273', 'step': 11003, 'epoch': 2} {'type': 'loss', 'content': 0.07917338609695435, 'timestamp': '2025-10-01 04:26:29.812626', 'step': 11004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:29.844045', 'step': 11004, 'epoch': 2} {'type': 'loss', 'content': 0.121431365609169, 'timestamp': '2025-10-01 04:26:29.846475', 'step': 11005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:29.876953', 'step': 11005, 'epoch': 2} {'type': 'loss', 'content': 0.21092025935649872, 'timestamp': '2025-10-01 04:26:29.879569', 'step': 11006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:26:29.911839', 'step': 11006, 'epoch': 2} {'type': 'loss', 'content': 0.14927925169467926, 'timestamp': '2025-10-01 04:26:29.915721', 'step': 11007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:29.947550', 'step': 11007, 'epoch': 2} {'type': 'loss', 'content': 0.07687253504991531, 'timestamp': '2025-10-01 04:26:29.971309', 'step': 11008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:30.002700', 'step': 11008, 'epoch': 2} {'type': 'loss', 'content': 0.060870829969644547, 'timestamp': '2025-10-01 04:26:30.004922', 'step': 11009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:30.036150', 'step': 11009, 'epoch': 2} {'type': 'loss', 'content': 0.12490372359752655, 'timestamp': '2025-10-01 04:26:30.038160', 'step': 11010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:30.070646', 'step': 11010, 'epoch': 2} {'type': 'loss', 'content': 0.09523904323577881, 'timestamp': '2025-10-01 04:26:30.072888', 'step': 11011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:30.104005', 'step': 11011, 'epoch': 2} {'type': 'loss', 'content': 0.06055697426199913, 'timestamp': '2025-10-01 04:26:30.127760', 'step': 11012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:30.159608', 'step': 11012, 'epoch': 2} {'type': 'loss', 'content': 0.05423193424940109, 'timestamp': '2025-10-01 04:26:30.161911', 'step': 11013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:30.192931', 'step': 11013, 'epoch': 2} {'type': 'loss', 'content': 0.07149755954742432, 'timestamp': '2025-10-01 04:26:30.195151', 'step': 11014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:30.225401', 'step': 11014, 'epoch': 2} {'type': 'loss', 'content': 0.1017199233174324, 'timestamp': '2025-10-01 04:26:30.227653', 'step': 11015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:30.258322', 'step': 11015, 'epoch': 2} {'type': 'loss', 'content': 0.09583612531423569, 'timestamp': '2025-10-01 04:26:30.281951', 'step': 11016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:30.312176', 'step': 11016, 'epoch': 2} {'type': 'loss', 'content': 0.11269190907478333, 'timestamp': '2025-10-01 04:26:30.314346', 'step': 11017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:30.344911', 'step': 11017, 'epoch': 2} {'type': 'loss', 'content': 0.11256352812051773, 'timestamp': '2025-10-01 04:26:30.347569', 'step': 11018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:30.378505', 'step': 11018, 'epoch': 2} {'type': 'loss', 'content': 0.17177315056324005, 'timestamp': '2025-10-01 04:26:30.380855', 'step': 11019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:30.412183', 'step': 11019, 'epoch': 2} {'type': 'loss', 'content': 0.03578425943851471, 'timestamp': '2025-10-01 04:26:30.435755', 'step': 11020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:30.466550', 'step': 11020, 'epoch': 2} {'type': 'loss', 'content': 0.1293897032737732, 'timestamp': '2025-10-01 04:26:30.468779', 'step': 11021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:30.499228', 'step': 11021, 'epoch': 2} {'type': 'loss', 'content': 0.1676870584487915, 'timestamp': '2025-10-01 04:26:30.501534', 'step': 11022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:30.531629', 'step': 11022, 'epoch': 2} {'type': 'loss', 'content': 0.11035328358411789, 'timestamp': '2025-10-01 04:26:30.533947', 'step': 11023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:30.563978', 'step': 11023, 'epoch': 2} {'type': 'loss', 'content': 0.10244673490524292, 'timestamp': '2025-10-01 04:26:30.587667', 'step': 11024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:30.618233', 'step': 11024, 'epoch': 2} {'type': 'loss', 'content': 0.07129715383052826, 'timestamp': '2025-10-01 04:26:30.620580', 'step': 11025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:30.650806', 'step': 11025, 'epoch': 2} {'type': 'loss', 'content': 0.1547776758670807, 'timestamp': '2025-10-01 04:26:30.653000', 'step': 11026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:30.683079', 'step': 11026, 'epoch': 2} {'type': 'loss', 'content': 0.035241562873125076, 'timestamp': '2025-10-01 04:26:30.685510', 'step': 11027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:30.715804', 'step': 11027, 'epoch': 2} {'type': 'loss', 'content': 0.06496904045343399, 'timestamp': '2025-10-01 04:26:30.739444', 'step': 11028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:30.771515', 'step': 11028, 'epoch': 2} {'type': 'loss', 'content': 0.1222502738237381, 'timestamp': '2025-10-01 04:26:30.774197', 'step': 11029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:30.804575', 'step': 11029, 'epoch': 2} {'type': 'loss', 'content': 0.1576407253742218, 'timestamp': '2025-10-01 04:26:30.807107', 'step': 11030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:30.838100', 'step': 11030, 'epoch': 2} {'type': 'loss', 'content': 0.09928908199071884, 'timestamp': '2025-10-01 04:26:30.840909', 'step': 11031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:30.871450', 'step': 11031, 'epoch': 2} {'type': 'loss', 'content': 0.13933569192886353, 'timestamp': '2025-10-01 04:26:30.896099', 'step': 11032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:30.927683', 'step': 11032, 'epoch': 2} {'type': 'loss', 'content': 0.12974396347999573, 'timestamp': '2025-10-01 04:26:30.930304', 'step': 11033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:30.962875', 'step': 11033, 'epoch': 2} {'type': 'loss', 'content': 0.15280435979366302, 'timestamp': '2025-10-01 04:26:30.965649', 'step': 11034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:31.001505', 'step': 11034, 'epoch': 2} {'type': 'loss', 'content': 0.09739790111780167, 'timestamp': '2025-10-01 04:26:31.006258', 'step': 11035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:31.050932', 'step': 11035, 'epoch': 2} {'type': 'loss', 'content': 0.16322770714759827, 'timestamp': '2025-10-01 04:26:31.075368', 'step': 11036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:31.106938', 'step': 11036, 'epoch': 2} {'type': 'loss', 'content': 0.15019559860229492, 'timestamp': '2025-10-01 04:26:31.109594', 'step': 11037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:31.141435', 'step': 11037, 'epoch': 2} {'type': 'loss', 'content': 0.16527153551578522, 'timestamp': '2025-10-01 04:26:31.144439', 'step': 11038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:31.175695', 'step': 11038, 'epoch': 2} {'type': 'loss', 'content': 0.06775054335594177, 'timestamp': '2025-10-01 04:26:31.178253', 'step': 11039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:31.210068', 'step': 11039, 'epoch': 2} {'type': 'loss', 'content': 0.06911782920360565, 'timestamp': '2025-10-01 04:26:31.234026', 'step': 11040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:31.265482', 'step': 11040, 'epoch': 2} {'type': 'loss', 'content': 0.09391836822032928, 'timestamp': '2025-10-01 04:26:31.267903', 'step': 11041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:31.299712', 'step': 11041, 'epoch': 2} {'type': 'loss', 'content': 0.08571889251470566, 'timestamp': '2025-10-01 04:26:31.302260', 'step': 11042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:31.332917', 'step': 11042, 'epoch': 2} {'type': 'loss', 'content': 0.05807919055223465, 'timestamp': '2025-10-01 04:26:31.335331', 'step': 11043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:31.366831', 'step': 11043, 'epoch': 2} {'type': 'loss', 'content': 0.126497283577919, 'timestamp': '2025-10-01 04:26:31.391023', 'step': 11044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:31.423449', 'step': 11044, 'epoch': 2} {'type': 'loss', 'content': 0.18215155601501465, 'timestamp': '2025-10-01 04:26:31.425879', 'step': 11045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:31.456697', 'step': 11045, 'epoch': 2} {'type': 'loss', 'content': 0.1986219584941864, 'timestamp': '2025-10-01 04:26:31.459308', 'step': 11046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:31.490415', 'step': 11046, 'epoch': 2} {'type': 'loss', 'content': 0.0464351586997509, 'timestamp': '2025-10-01 04:26:31.492939', 'step': 11047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:31.523889', 'step': 11047, 'epoch': 2} {'type': 'loss', 'content': 0.0819651409983635, 'timestamp': '2025-10-01 04:26:31.547842', 'step': 11048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:31.579133', 'step': 11048, 'epoch': 2} {'type': 'loss', 'content': 0.1316046118736267, 'timestamp': '2025-10-01 04:26:31.581743', 'step': 11049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:31.612518', 'step': 11049, 'epoch': 2} {'type': 'loss', 'content': 0.16511674225330353, 'timestamp': '2025-10-01 04:26:31.615177', 'step': 11050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:31.657806', 'step': 11050, 'epoch': 2} {'type': 'loss', 'content': 0.05671503767371178, 'timestamp': '2025-10-01 04:26:31.660466', 'step': 11051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:31.691891', 'step': 11051, 'epoch': 2} {'type': 'loss', 'content': 0.07963602244853973, 'timestamp': '2025-10-01 04:26:31.715816', 'step': 11052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:31.748170', 'step': 11052, 'epoch': 2} {'type': 'loss', 'content': 0.0834229588508606, 'timestamp': '2025-10-01 04:26:31.750640', 'step': 11053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:31.786787', 'step': 11053, 'epoch': 2} {'type': 'loss', 'content': 0.1453947126865387, 'timestamp': '2025-10-01 04:26:31.789335', 'step': 11054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:31.820628', 'step': 11054, 'epoch': 2} {'type': 'loss', 'content': 0.07440029829740524, 'timestamp': '2025-10-01 04:26:31.823412', 'step': 11055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:31.853915', 'step': 11055, 'epoch': 2} {'type': 'loss', 'content': 0.04234221950173378, 'timestamp': '2025-10-01 04:26:31.878361', 'step': 11056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:31.912994', 'step': 11056, 'epoch': 2} {'type': 'loss', 'content': 0.14729808270931244, 'timestamp': '2025-10-01 04:26:31.917228', 'step': 11057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:31.947620', 'step': 11057, 'epoch': 2} {'type': 'loss', 'content': 0.07941000908613205, 'timestamp': '2025-10-01 04:26:31.950442', 'step': 11058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:31.983326', 'step': 11058, 'epoch': 2} {'type': 'loss', 'content': 0.07984393835067749, 'timestamp': '2025-10-01 04:26:31.985580', 'step': 11059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:32.017107', 'step': 11059, 'epoch': 2} {'type': 'loss', 'content': 0.13096924126148224, 'timestamp': '2025-10-01 04:26:32.040871', 'step': 11060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:32.070947', 'step': 11060, 'epoch': 2} {'type': 'loss', 'content': 0.13085556030273438, 'timestamp': '2025-10-01 04:26:32.073242', 'step': 11061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:32.103505', 'step': 11061, 'epoch': 2} {'type': 'loss', 'content': 0.043096207082271576, 'timestamp': '2025-10-01 04:26:32.106150', 'step': 11062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:32.137592', 'step': 11062, 'epoch': 2} {'type': 'loss', 'content': 0.06534963846206665, 'timestamp': '2025-10-01 04:26:32.139878', 'step': 11063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:32.170507', 'step': 11063, 'epoch': 2} {'type': 'loss', 'content': 0.10264168679714203, 'timestamp': '2025-10-01 04:26:32.194840', 'step': 11064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:32.226179', 'step': 11064, 'epoch': 2} {'type': 'loss', 'content': 0.04982347786426544, 'timestamp': '2025-10-01 04:26:32.228771', 'step': 11065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:32.259704', 'step': 11065, 'epoch': 2} {'type': 'loss', 'content': 0.07334582507610321, 'timestamp': '2025-10-01 04:26:32.262221', 'step': 11066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:32.296559', 'step': 11066, 'epoch': 2} {'type': 'loss', 'content': 0.23069556057453156, 'timestamp': '2025-10-01 04:26:32.298817', 'step': 11067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:32.333278', 'step': 11067, 'epoch': 2} {'type': 'loss', 'content': 0.14277516305446625, 'timestamp': '2025-10-01 04:26:32.356993', 'step': 11068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:32.391079', 'step': 11068, 'epoch': 2} {'type': 'loss', 'content': 0.09916385263204575, 'timestamp': '2025-10-01 04:26:32.393219', 'step': 11069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:32.424379', 'step': 11069, 'epoch': 2} {'type': 'loss', 'content': 0.10168632864952087, 'timestamp': '2025-10-01 04:26:32.426596', 'step': 11070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:32.459904', 'step': 11070, 'epoch': 2} {'type': 'loss', 'content': 0.19590140879154205, 'timestamp': '2025-10-01 04:26:32.462197', 'step': 11071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:32.497681', 'step': 11071, 'epoch': 2} {'type': 'loss', 'content': 0.13834893703460693, 'timestamp': '2025-10-01 04:26:32.521576', 'step': 11072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:32.553851', 'step': 11072, 'epoch': 2} {'type': 'loss', 'content': 0.10764741897583008, 'timestamp': '2025-10-01 04:26:32.556311', 'step': 11073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:32.590743', 'step': 11073, 'epoch': 2} {'type': 'loss', 'content': 0.1684587448835373, 'timestamp': '2025-10-01 04:26:32.593398', 'step': 11074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:32.624951', 'step': 11074, 'epoch': 2} {'type': 'loss', 'content': 0.05136599391698837, 'timestamp': '2025-10-01 04:26:32.627571', 'step': 11075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:32.659060', 'step': 11075, 'epoch': 2} {'type': 'loss', 'content': 0.18685981631278992, 'timestamp': '2025-10-01 04:26:32.683031', 'step': 11076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:32.715424', 'step': 11076, 'epoch': 2} {'type': 'loss', 'content': 0.07879500836133957, 'timestamp': '2025-10-01 04:26:32.717615', 'step': 11077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:32.748385', 'step': 11077, 'epoch': 2} {'type': 'loss', 'content': 0.10705852508544922, 'timestamp': '2025-10-01 04:26:32.750736', 'step': 11078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:32.781846', 'step': 11078, 'epoch': 2} {'type': 'loss', 'content': 0.08625276386737823, 'timestamp': '2025-10-01 04:26:32.784099', 'step': 11079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:32.817050', 'step': 11079, 'epoch': 2} {'type': 'loss', 'content': 0.169469952583313, 'timestamp': '2025-10-01 04:26:32.840741', 'step': 11080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:32.871746', 'step': 11080, 'epoch': 2} {'type': 'loss', 'content': 0.13709086179733276, 'timestamp': '2025-10-01 04:26:32.874009', 'step': 11081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:32.905075', 'step': 11081, 'epoch': 2} {'type': 'loss', 'content': 0.06782760471105576, 'timestamp': '2025-10-01 04:26:32.907995', 'step': 11082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:32.941383', 'step': 11082, 'epoch': 2} {'type': 'loss', 'content': 0.13627083599567413, 'timestamp': '2025-10-01 04:26:32.943834', 'step': 11083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:32.974597', 'step': 11083, 'epoch': 2} {'type': 'loss', 'content': 0.14268964529037476, 'timestamp': '2025-10-01 04:26:32.998316', 'step': 11084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:33.029982', 'step': 11084, 'epoch': 2} {'type': 'loss', 'content': 0.12057681381702423, 'timestamp': '2025-10-01 04:26:33.032166', 'step': 11085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:33.064448', 'step': 11085, 'epoch': 2} {'type': 'loss', 'content': 0.032240089029073715, 'timestamp': '2025-10-01 04:26:33.066888', 'step': 11086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:33.106418', 'step': 11086, 'epoch': 2} {'type': 'loss', 'content': 0.06099311262369156, 'timestamp': '2025-10-01 04:26:33.108737', 'step': 11087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:33.140280', 'step': 11087, 'epoch': 2} {'type': 'loss', 'content': 0.04485737904906273, 'timestamp': '2025-10-01 04:26:33.163958', 'step': 11088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:33.197343', 'step': 11088, 'epoch': 2} {'type': 'loss', 'content': 0.15287600457668304, 'timestamp': '2025-10-01 04:26:33.199485', 'step': 11089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:33.231317', 'step': 11089, 'epoch': 2} {'type': 'loss', 'content': 0.10986838489770889, 'timestamp': '2025-10-01 04:26:33.237994', 'step': 11090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:33.269794', 'step': 11090, 'epoch': 2} {'type': 'loss', 'content': 0.1422656774520874, 'timestamp': '2025-10-01 04:26:33.272244', 'step': 11091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:33.307555', 'step': 11091, 'epoch': 2} {'type': 'loss', 'content': 0.17607739567756653, 'timestamp': '2025-10-01 04:26:33.331544', 'step': 11092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:33.362797', 'step': 11092, 'epoch': 2} {'type': 'loss', 'content': 0.16974236071109772, 'timestamp': '2025-10-01 04:26:33.365021', 'step': 11093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:33.398888', 'step': 11093, 'epoch': 2} {'type': 'loss', 'content': 0.1106613278388977, 'timestamp': '2025-10-01 04:26:33.402009', 'step': 11094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:33.433503', 'step': 11094, 'epoch': 2} {'type': 'loss', 'content': 0.06512697786092758, 'timestamp': '2025-10-01 04:26:33.435782', 'step': 11095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:33.466989', 'step': 11095, 'epoch': 2} {'type': 'loss', 'content': 0.12885499000549316, 'timestamp': '2025-10-01 04:26:33.490727', 'step': 11096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:33.524011', 'step': 11096, 'epoch': 2} {'type': 'loss', 'content': 0.12067815661430359, 'timestamp': '2025-10-01 04:26:33.526535', 'step': 11097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:33.558573', 'step': 11097, 'epoch': 2} {'type': 'loss', 'content': 0.11913439631462097, 'timestamp': '2025-10-01 04:26:33.560787', 'step': 11098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:33.599402', 'step': 11098, 'epoch': 2} {'type': 'loss', 'content': 0.1682584434747696, 'timestamp': '2025-10-01 04:26:33.601646', 'step': 11099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:33.633581', 'step': 11099, 'epoch': 2} {'type': 'loss', 'content': 0.15130692720413208, 'timestamp': '2025-10-01 04:26:33.657366', 'step': 11100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:33.691025', 'step': 11100, 'epoch': 2} {'type': 'loss', 'content': 0.14773684740066528, 'timestamp': '2025-10-01 04:26:33.693282', 'step': 11101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:33.726458', 'step': 11101, 'epoch': 2} {'type': 'loss', 'content': 0.11115887016057968, 'timestamp': '2025-10-01 04:26:33.728667', 'step': 11102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:33.762107', 'step': 11102, 'epoch': 2} {'type': 'loss', 'content': 0.0741371363401413, 'timestamp': '2025-10-01 04:26:33.764307', 'step': 11103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:33.796224', 'step': 11103, 'epoch': 2} {'type': 'loss', 'content': 0.08487286418676376, 'timestamp': '2025-10-01 04:26:33.820159', 'step': 11104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:33.854551', 'step': 11104, 'epoch': 2} {'type': 'loss', 'content': 0.09348977357149124, 'timestamp': '2025-10-01 04:26:33.856883', 'step': 11105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:33.891501', 'step': 11105, 'epoch': 2} {'type': 'loss', 'content': 0.09009003639221191, 'timestamp': '2025-10-01 04:26:33.894077', 'step': 11106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:33.926709', 'step': 11106, 'epoch': 2} {'type': 'loss', 'content': 0.05338931456208229, 'timestamp': '2025-10-01 04:26:33.928805', 'step': 11107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:33.962068', 'step': 11107, 'epoch': 2} {'type': 'loss', 'content': 0.19463849067687988, 'timestamp': '2025-10-01 04:26:33.986484', 'step': 11108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:34.019302', 'step': 11108, 'epoch': 2} {'type': 'loss', 'content': 0.11204922199249268, 'timestamp': '2025-10-01 04:26:34.021951', 'step': 11109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:34.055240', 'step': 11109, 'epoch': 2} {'type': 'loss', 'content': 0.22190165519714355, 'timestamp': '2025-10-01 04:26:34.057578', 'step': 11110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:34.090572', 'step': 11110, 'epoch': 2} {'type': 'loss', 'content': 0.13995322585105896, 'timestamp': '2025-10-01 04:26:34.093512', 'step': 11111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-10-01 04:26:34.129801', 'step': 11111, 'epoch': 2} {'type': 'loss', 'content': 0.08016378432512283, 'timestamp': '2025-10-01 04:26:34.164569', 'step': 11112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:34.196486', 'step': 11112, 'epoch': 2} {'type': 'loss', 'content': 0.07713231444358826, 'timestamp': '2025-10-01 04:26:34.199005', 'step': 11113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:34.229467', 'step': 11113, 'epoch': 2} {'type': 'loss', 'content': 0.13253989815711975, 'timestamp': '2025-10-01 04:26:34.234333', 'step': 11114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:34.264994', 'step': 11114, 'epoch': 2} {'type': 'loss', 'content': 0.07111245393753052, 'timestamp': '2025-10-01 04:26:34.267324', 'step': 11115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:34.297948', 'step': 11115, 'epoch': 2} {'type': 'loss', 'content': 0.11690016090869904, 'timestamp': '2025-10-01 04:26:34.325118', 'step': 11116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:34.355470', 'step': 11116, 'epoch': 2} {'type': 'loss', 'content': 0.15068139135837555, 'timestamp': '2025-10-01 04:26:34.357697', 'step': 11117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:34.388307', 'step': 11117, 'epoch': 2} {'type': 'loss', 'content': 0.06870206445455551, 'timestamp': '2025-10-01 04:26:34.390535', 'step': 11118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:34.421310', 'step': 11118, 'epoch': 2} {'type': 'loss', 'content': 0.09317687898874283, 'timestamp': '2025-10-01 04:26:34.423756', 'step': 11119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:34.455322', 'step': 11119, 'epoch': 2} {'type': 'loss', 'content': 0.21815688908100128, 'timestamp': '2025-10-01 04:26:34.479297', 'step': 11120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:34.510418', 'step': 11120, 'epoch': 2} {'type': 'loss', 'content': 0.10708213597536087, 'timestamp': '2025-10-01 04:26:34.512700', 'step': 11121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:34.548315', 'step': 11121, 'epoch': 2} {'type': 'loss', 'content': 0.1215708926320076, 'timestamp': '2025-10-01 04:26:34.550565', 'step': 11122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:34.581405', 'step': 11122, 'epoch': 2} {'type': 'loss', 'content': 0.20504137873649597, 'timestamp': '2025-10-01 04:26:34.583985', 'step': 11123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:34.617008', 'step': 11123, 'epoch': 2} {'type': 'loss', 'content': 0.09563682973384857, 'timestamp': '2025-10-01 04:26:34.640816', 'step': 11124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:26:34.675613', 'step': 11124, 'epoch': 2} {'type': 'loss', 'content': 0.08742547780275345, 'timestamp': '2025-10-01 04:26:34.678011', 'step': 11125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:34.709652', 'step': 11125, 'epoch': 2} {'type': 'loss', 'content': 0.1292501538991928, 'timestamp': '2025-10-01 04:26:34.711979', 'step': 11126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:34.742954', 'step': 11126, 'epoch': 2} {'type': 'loss', 'content': 0.12571799755096436, 'timestamp': '2025-10-01 04:26:34.745247', 'step': 11127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:34.776605', 'step': 11127, 'epoch': 2} {'type': 'loss', 'content': 0.0826491117477417, 'timestamp': '2025-10-01 04:26:34.800526', 'step': 11128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:34.831818', 'step': 11128, 'epoch': 2} {'type': 'loss', 'content': 0.05580199509859085, 'timestamp': '2025-10-01 04:26:34.834098', 'step': 11129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:34.864922', 'step': 11129, 'epoch': 2} {'type': 'loss', 'content': 0.11015254259109497, 'timestamp': '2025-10-01 04:26:34.867232', 'step': 11130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:34.898921', 'step': 11130, 'epoch': 2} {'type': 'loss', 'content': 0.07681185007095337, 'timestamp': '2025-10-01 04:26:34.901171', 'step': 11131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:34.932409', 'step': 11131, 'epoch': 2} {'type': 'loss', 'content': 0.14654111862182617, 'timestamp': '2025-10-01 04:26:34.956388', 'step': 11132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:34.987607', 'step': 11132, 'epoch': 2} {'type': 'loss', 'content': 0.07089829444885254, 'timestamp': '2025-10-01 04:26:34.989993', 'step': 11133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:35.020885', 'step': 11133, 'epoch': 2} {'type': 'loss', 'content': 0.10992831736803055, 'timestamp': '2025-10-01 04:26:35.023378', 'step': 11134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.054682', 'step': 11134, 'epoch': 2} {'type': 'loss', 'content': 0.18030445277690887, 'timestamp': '2025-10-01 04:26:35.057277', 'step': 11135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:35.088861', 'step': 11135, 'epoch': 2} {'type': 'loss', 'content': 0.04597797617316246, 'timestamp': '2025-10-01 04:26:35.112843', 'step': 11136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:35.144467', 'step': 11136, 'epoch': 2} {'type': 'loss', 'content': 0.1235620528459549, 'timestamp': '2025-10-01 04:26:35.146758', 'step': 11137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:35.177624', 'step': 11137, 'epoch': 2} {'type': 'loss', 'content': 0.08576911687850952, 'timestamp': '2025-10-01 04:26:35.180069', 'step': 11138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.216394', 'step': 11138, 'epoch': 2} {'type': 'loss', 'content': 0.11593228578567505, 'timestamp': '2025-10-01 04:26:35.218774', 'step': 11139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.250290', 'step': 11139, 'epoch': 2} {'type': 'loss', 'content': 0.16999579966068268, 'timestamp': '2025-10-01 04:26:35.274302', 'step': 11140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:26:35.306532', 'step': 11140, 'epoch': 2} {'type': 'loss', 'content': 0.12328816205263138, 'timestamp': '2025-10-01 04:26:35.308929', 'step': 11141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.339921', 'step': 11141, 'epoch': 2} {'type': 'loss', 'content': 0.06342537701129913, 'timestamp': '2025-10-01 04:26:35.350074', 'step': 11142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.387967', 'step': 11142, 'epoch': 2} {'type': 'loss', 'content': 0.1525345891714096, 'timestamp': '2025-10-01 04:26:35.391884', 'step': 11143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.426119', 'step': 11143, 'epoch': 2} {'type': 'loss', 'content': 0.09669976681470871, 'timestamp': '2025-10-01 04:26:35.452897', 'step': 11144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:35.486038', 'step': 11144, 'epoch': 2} {'type': 'loss', 'content': 0.14584538340568542, 'timestamp': '2025-10-01 04:26:35.488351', 'step': 11145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.520684', 'step': 11145, 'epoch': 2} {'type': 'loss', 'content': 0.2544129192829132, 'timestamp': '2025-10-01 04:26:35.522969', 'step': 11146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:35.554712', 'step': 11146, 'epoch': 2} {'type': 'loss', 'content': 0.10022180527448654, 'timestamp': '2025-10-01 04:26:35.557140', 'step': 11147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.594498', 'step': 11147, 'epoch': 2} {'type': 'loss', 'content': 0.11844691634178162, 'timestamp': '2025-10-01 04:26:35.627639', 'step': 11148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:35.664062', 'step': 11148, 'epoch': 2} {'type': 'loss', 'content': 0.12329792231321335, 'timestamp': '2025-10-01 04:26:35.666607', 'step': 11149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:35.699715', 'step': 11149, 'epoch': 2} {'type': 'loss', 'content': 0.14549899101257324, 'timestamp': '2025-10-01 04:26:35.702020', 'step': 11150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:35.734016', 'step': 11150, 'epoch': 2} {'type': 'loss', 'content': 0.07832881808280945, 'timestamp': '2025-10-01 04:26:35.736376', 'step': 11151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:35.767592', 'step': 11151, 'epoch': 2} {'type': 'loss', 'content': 0.08806207776069641, 'timestamp': '2025-10-01 04:26:35.791618', 'step': 11152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:35.823410', 'step': 11152, 'epoch': 2} {'type': 'loss', 'content': 0.09835957735776901, 'timestamp': '2025-10-01 04:26:35.825734', 'step': 11153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.856681', 'step': 11153, 'epoch': 2} {'type': 'loss', 'content': 0.18786220252513885, 'timestamp': '2025-10-01 04:26:35.858906', 'step': 11154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.893856', 'step': 11154, 'epoch': 2} {'type': 'loss', 'content': 0.23058997094631195, 'timestamp': '2025-10-01 04:26:35.896121', 'step': 11155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:35.929180', 'step': 11155, 'epoch': 2} {'type': 'loss', 'content': 0.05679960921406746, 'timestamp': '2025-10-01 04:26:35.953614', 'step': 11156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:26:35.987074', 'step': 11156, 'epoch': 2} {'type': 'loss', 'content': 0.034471187740564346, 'timestamp': '2025-10-01 04:26:35.989490', 'step': 11157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.021923', 'step': 11157, 'epoch': 2} {'type': 'loss', 'content': 0.0899820551276207, 'timestamp': '2025-10-01 04:26:36.024194', 'step': 11158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.061604', 'step': 11158, 'epoch': 2} {'type': 'loss', 'content': 0.056104786694049835, 'timestamp': '2025-10-01 04:26:36.064044', 'step': 11159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:36.095915', 'step': 11159, 'epoch': 2} {'type': 'loss', 'content': 0.05239352211356163, 'timestamp': '2025-10-01 04:26:36.120007', 'step': 11160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:36.151851', 'step': 11160, 'epoch': 2} {'type': 'loss', 'content': 0.16468314826488495, 'timestamp': '2025-10-01 04:26:36.154220', 'step': 11161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.186583', 'step': 11161, 'epoch': 2} {'type': 'loss', 'content': 0.12175245583057404, 'timestamp': '2025-10-01 04:26:36.189026', 'step': 11162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:36.220691', 'step': 11162, 'epoch': 2} {'type': 'loss', 'content': 0.1038743108510971, 'timestamp': '2025-10-01 04:26:36.223394', 'step': 11163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:36.257329', 'step': 11163, 'epoch': 2} {'type': 'loss', 'content': 0.13817250728607178, 'timestamp': '2025-10-01 04:26:36.292292', 'step': 11164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:36.323023', 'step': 11164, 'epoch': 2} {'type': 'loss', 'content': 0.12214197218418121, 'timestamp': '2025-10-01 04:26:36.325391', 'step': 11165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.355599', 'step': 11165, 'epoch': 2} {'type': 'loss', 'content': 0.07576719671487808, 'timestamp': '2025-10-01 04:26:36.357879', 'step': 11166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.387969', 'step': 11166, 'epoch': 2} {'type': 'loss', 'content': 0.18237601220607758, 'timestamp': '2025-10-01 04:26:36.390363', 'step': 11167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.420796', 'step': 11167, 'epoch': 2} {'type': 'loss', 'content': 0.13936029374599457, 'timestamp': '2025-10-01 04:26:36.444576', 'step': 11168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:36.475082', 'step': 11168, 'epoch': 2} {'type': 'loss', 'content': 0.10773903876543045, 'timestamp': '2025-10-01 04:26:36.477442', 'step': 11169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:36.521273', 'step': 11169, 'epoch': 2} {'type': 'loss', 'content': 0.05177666246891022, 'timestamp': '2025-10-01 04:26:36.523589', 'step': 11170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:36.564377', 'step': 11170, 'epoch': 2} {'type': 'loss', 'content': 0.1256542056798935, 'timestamp': '2025-10-01 04:26:36.566766', 'step': 11171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:36.597474', 'step': 11171, 'epoch': 2} {'type': 'loss', 'content': 0.07350389659404755, 'timestamp': '2025-10-01 04:26:36.621399', 'step': 11172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.651627', 'step': 11172, 'epoch': 2} {'type': 'loss', 'content': 0.16097506880760193, 'timestamp': '2025-10-01 04:26:36.653832', 'step': 11173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.684013', 'step': 11173, 'epoch': 2} {'type': 'loss', 'content': 0.1795499175786972, 'timestamp': '2025-10-01 04:26:36.697551', 'step': 11174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:36.727714', 'step': 11174, 'epoch': 2} {'type': 'loss', 'content': 0.17128397524356842, 'timestamp': '2025-10-01 04:26:36.730057', 'step': 11175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.760537', 'step': 11175, 'epoch': 2} {'type': 'loss', 'content': 0.19980551302433014, 'timestamp': '2025-10-01 04:26:36.793985', 'step': 11176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.825980', 'step': 11176, 'epoch': 2} {'type': 'loss', 'content': 0.07340766489505768, 'timestamp': '2025-10-01 04:26:36.829227', 'step': 11177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:36.859800', 'step': 11177, 'epoch': 2} {'type': 'loss', 'content': 0.07656463980674744, 'timestamp': '2025-10-01 04:26:36.862357', 'step': 11178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:36.892922', 'step': 11178, 'epoch': 2} {'type': 'loss', 'content': 0.12502281367778778, 'timestamp': '2025-10-01 04:26:36.895079', 'step': 11179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:36.926180', 'step': 11179, 'epoch': 2} {'type': 'loss', 'content': 0.09771016240119934, 'timestamp': '2025-10-01 04:26:36.950003', 'step': 11180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:36.980754', 'step': 11180, 'epoch': 2} {'type': 'loss', 'content': 0.13327081501483917, 'timestamp': '2025-10-01 04:26:36.983139', 'step': 11181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:37.014399', 'step': 11181, 'epoch': 2} {'type': 'loss', 'content': 0.06886915117502213, 'timestamp': '2025-10-01 04:26:37.016738', 'step': 11182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:37.048139', 'step': 11182, 'epoch': 2} {'type': 'loss', 'content': 0.16394393146038055, 'timestamp': '2025-10-01 04:26:37.050397', 'step': 11183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:37.081190', 'step': 11183, 'epoch': 2} {'type': 'loss', 'content': 0.11318729817867279, 'timestamp': '2025-10-01 04:26:37.104995', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:26:45.327740', 'step': 11184, 'epoch': 2} {'type': 'pplx', 'content': 11323.896422601929, 'timestamp': '2025-10-01 04:26:45.392423', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:45.422661', 'step': 11184, 'epoch': 2} {'type': 'loss', 'content': 0.12015140056610107, 'timestamp': '2025-10-01 04:26:45.425260', 'step': 11185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:45.456269', 'step': 11185, 'epoch': 2} {'type': 'loss', 'content': 0.11822488158941269, 'timestamp': '2025-10-01 04:26:45.458712', 'step': 11186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:45.490152', 'step': 11186, 'epoch': 2} {'type': 'loss', 'content': 0.1318720430135727, 'timestamp': '2025-10-01 04:26:45.492695', 'step': 11187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:45.524453', 'step': 11187, 'epoch': 2} {'type': 'loss', 'content': 0.10444501042366028, 'timestamp': '2025-10-01 04:26:45.548371', 'step': 11188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:45.578866', 'step': 11188, 'epoch': 2} {'type': 'loss', 'content': 0.05998540669679642, 'timestamp': '2025-10-01 04:26:45.581234', 'step': 11189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:45.612631', 'step': 11189, 'epoch': 2} {'type': 'loss', 'content': 0.1606677621603012, 'timestamp': '2025-10-01 04:26:45.615083', 'step': 11190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:45.645778', 'step': 11190, 'epoch': 2} {'type': 'loss', 'content': 0.11904416978359222, 'timestamp': '2025-10-01 04:26:45.648094', 'step': 11191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:45.678632', 'step': 11191, 'epoch': 2} {'type': 'loss', 'content': 0.19977527856826782, 'timestamp': '2025-10-01 04:26:45.714367', 'step': 11192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:45.745944', 'step': 11192, 'epoch': 2} {'type': 'loss', 'content': 0.14602996408939362, 'timestamp': '2025-10-01 04:26:45.749237', 'step': 11193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:45.781303', 'step': 11193, 'epoch': 2} {'type': 'loss', 'content': 0.09523987025022507, 'timestamp': '2025-10-01 04:26:45.783682', 'step': 11194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:45.816342', 'step': 11194, 'epoch': 2} {'type': 'loss', 'content': 0.09151937067508698, 'timestamp': '2025-10-01 04:26:45.818990', 'step': 11195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:45.849854', 'step': 11195, 'epoch': 2} {'type': 'loss', 'content': 0.13433104753494263, 'timestamp': '2025-10-01 04:26:45.873936', 'step': 11196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:45.906668', 'step': 11196, 'epoch': 2} {'type': 'loss', 'content': 0.09046124666929245, 'timestamp': '2025-10-01 04:26:45.909111', 'step': 11197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:45.940283', 'step': 11197, 'epoch': 2} {'type': 'loss', 'content': 0.02533303201198578, 'timestamp': '2025-10-01 04:26:45.943064', 'step': 11198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:45.974299', 'step': 11198, 'epoch': 2} {'type': 'loss', 'content': 0.1227831318974495, 'timestamp': '2025-10-01 04:26:45.976779', 'step': 11199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:46.008161', 'step': 11199, 'epoch': 2} {'type': 'loss', 'content': 0.11206474155187607, 'timestamp': '2025-10-01 04:26:46.032139', 'step': 11200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:46.063677', 'step': 11200, 'epoch': 2} {'type': 'loss', 'content': 0.15964660048484802, 'timestamp': '2025-10-01 04:26:46.066087', 'step': 11201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:46.097395', 'step': 11201, 'epoch': 2} {'type': 'loss', 'content': 0.045767176896333694, 'timestamp': '2025-10-01 04:26:46.099809', 'step': 11202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:46.130784', 'step': 11202, 'epoch': 2} {'type': 'loss', 'content': 0.11971092224121094, 'timestamp': '2025-10-01 04:26:46.133050', 'step': 11203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:46.164499', 'step': 11203, 'epoch': 2} {'type': 'loss', 'content': 0.15135188400745392, 'timestamp': '2025-10-01 04:26:46.188424', 'step': 11204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:46.219227', 'step': 11204, 'epoch': 2} {'type': 'loss', 'content': 0.11553644388914108, 'timestamp': '2025-10-01 04:26:46.222253', 'step': 11205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:46.253180', 'step': 11205, 'epoch': 2} {'type': 'loss', 'content': 0.09016440063714981, 'timestamp': '2025-10-01 04:26:46.255496', 'step': 11206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:46.289684', 'step': 11206, 'epoch': 2} {'type': 'loss', 'content': 0.10757129639387131, 'timestamp': '2025-10-01 04:26:46.292198', 'step': 11207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:46.323718', 'step': 11207, 'epoch': 2} {'type': 'loss', 'content': 0.10803334414958954, 'timestamp': '2025-10-01 04:26:46.347533', 'step': 11208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:46.379879', 'step': 11208, 'epoch': 2} {'type': 'loss', 'content': 0.1443406045436859, 'timestamp': '2025-10-01 04:26:46.382282', 'step': 11209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:46.414215', 'step': 11209, 'epoch': 2} {'type': 'loss', 'content': 0.11460736393928528, 'timestamp': '2025-10-01 04:26:46.416575', 'step': 11210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:46.447933', 'step': 11210, 'epoch': 2} {'type': 'loss', 'content': 0.12858964502811432, 'timestamp': '2025-10-01 04:26:46.450465', 'step': 11211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:46.481927', 'step': 11211, 'epoch': 2} {'type': 'loss', 'content': 0.06268920004367828, 'timestamp': '2025-10-01 04:26:46.505492', 'step': 11212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:46.537411', 'step': 11212, 'epoch': 2} {'type': 'loss', 'content': 0.09735716134309769, 'timestamp': '2025-10-01 04:26:46.539780', 'step': 11213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:46.570074', 'step': 11213, 'epoch': 2} {'type': 'loss', 'content': 0.08631306886672974, 'timestamp': '2025-10-01 04:26:46.579847', 'step': 11214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:46.610895', 'step': 11214, 'epoch': 2} {'type': 'loss', 'content': 0.0874861627817154, 'timestamp': '2025-10-01 04:26:46.613993', 'step': 11215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:46.644669', 'step': 11215, 'epoch': 2} {'type': 'loss', 'content': 0.093726746737957, 'timestamp': '2025-10-01 04:26:46.668775', 'step': 11216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:46.699242', 'step': 11216, 'epoch': 2} {'type': 'loss', 'content': 0.08164377510547638, 'timestamp': '2025-10-01 04:26:46.701763', 'step': 11217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:26:46.733266', 'step': 11217, 'epoch': 2} {'type': 'loss', 'content': 0.20306560397148132, 'timestamp': '2025-10-01 04:26:46.737898', 'step': 11218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:46.769718', 'step': 11218, 'epoch': 2} {'type': 'loss', 'content': 0.06228203326463699, 'timestamp': '2025-10-01 04:26:46.772034', 'step': 11219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:46.803523', 'step': 11219, 'epoch': 2} {'type': 'loss', 'content': 0.05007117614150047, 'timestamp': '2025-10-01 04:26:46.827353', 'step': 11220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:46.857824', 'step': 11220, 'epoch': 2} {'type': 'loss', 'content': 0.05830845236778259, 'timestamp': '2025-10-01 04:26:46.860368', 'step': 11221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:46.891887', 'step': 11221, 'epoch': 2} {'type': 'loss', 'content': 0.07539871335029602, 'timestamp': '2025-10-01 04:26:46.894961', 'step': 11222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:46.926898', 'step': 11222, 'epoch': 2} {'type': 'loss', 'content': 0.10291256755590439, 'timestamp': '2025-10-01 04:26:46.928979', 'step': 11223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:46.963244', 'step': 11223, 'epoch': 2} {'type': 'loss', 'content': 0.10059255361557007, 'timestamp': '2025-10-01 04:26:46.987645', 'step': 11224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:47.018378', 'step': 11224, 'epoch': 2} {'type': 'loss', 'content': 0.030938848853111267, 'timestamp': '2025-10-01 04:26:47.021050', 'step': 11225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:47.051077', 'step': 11225, 'epoch': 2} {'type': 'loss', 'content': 0.06052118167281151, 'timestamp': '2025-10-01 04:26:47.056728', 'step': 11226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:47.088952', 'step': 11226, 'epoch': 2} {'type': 'loss', 'content': 0.06887023150920868, 'timestamp': '2025-10-01 04:26:47.091458', 'step': 11227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:47.122005', 'step': 11227, 'epoch': 2} {'type': 'loss', 'content': 0.11171463131904602, 'timestamp': '2025-10-01 04:26:47.145869', 'step': 11228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:47.176683', 'step': 11228, 'epoch': 2} {'type': 'loss', 'content': 0.13608691096305847, 'timestamp': '2025-10-01 04:26:47.179192', 'step': 11229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:47.211630', 'step': 11229, 'epoch': 2} {'type': 'loss', 'content': 0.09619998186826706, 'timestamp': '2025-10-01 04:26:47.214269', 'step': 11230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:47.246830', 'step': 11230, 'epoch': 2} {'type': 'loss', 'content': 0.05000194162130356, 'timestamp': '2025-10-01 04:26:47.268227', 'step': 11231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:47.299579', 'step': 11231, 'epoch': 2} {'type': 'loss', 'content': 0.11283659189939499, 'timestamp': '2025-10-01 04:26:47.352007', 'step': 11232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:47.388548', 'step': 11232, 'epoch': 2} {'type': 'loss', 'content': 0.08886425197124481, 'timestamp': '2025-10-01 04:26:47.400374', 'step': 11233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:47.441456', 'step': 11233, 'epoch': 2} {'type': 'loss', 'content': 0.164315864443779, 'timestamp': '2025-10-01 04:26:47.458274', 'step': 11234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:47.491884', 'step': 11234, 'epoch': 2} {'type': 'loss', 'content': 0.026039766147732735, 'timestamp': '2025-10-01 04:26:47.494908', 'step': 11235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:47.539631', 'step': 11235, 'epoch': 2} {'type': 'loss', 'content': 0.0392148494720459, 'timestamp': '2025-10-01 04:26:47.574751', 'step': 11236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:47.607739', 'step': 11236, 'epoch': 2} {'type': 'loss', 'content': 0.09766210615634918, 'timestamp': '2025-10-01 04:26:47.615364', 'step': 11237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:47.665394', 'step': 11237, 'epoch': 2} {'type': 'loss', 'content': 0.10252424329519272, 'timestamp': '2025-10-01 04:26:47.668935', 'step': 11238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:47.704735', 'step': 11238, 'epoch': 2} {'type': 'loss', 'content': 0.08161702007055283, 'timestamp': '2025-10-01 04:26:47.721285', 'step': 11239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:47.771430', 'step': 11239, 'epoch': 2} {'type': 'loss', 'content': 0.10868953913450241, 'timestamp': '2025-10-01 04:26:47.801965', 'step': 11240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:47.844140', 'step': 11240, 'epoch': 2} {'type': 'loss', 'content': 0.10605667531490326, 'timestamp': '2025-10-01 04:26:47.849356', 'step': 11241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:47.882414', 'step': 11241, 'epoch': 2} {'type': 'loss', 'content': 0.06337925046682358, 'timestamp': '2025-10-01 04:26:47.894583', 'step': 11242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:47.939680', 'step': 11242, 'epoch': 2} {'type': 'loss', 'content': 0.10171448439359665, 'timestamp': '2025-10-01 04:26:47.955222', 'step': 11243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:47.993038', 'step': 11243, 'epoch': 2} {'type': 'loss', 'content': 0.08096800744533539, 'timestamp': '2025-10-01 04:26:48.022740', 'step': 11244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:48.058016', 'step': 11244, 'epoch': 2} {'type': 'loss', 'content': 0.1282964050769806, 'timestamp': '2025-10-01 04:26:48.070565', 'step': 11245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:48.105832', 'step': 11245, 'epoch': 2} {'type': 'loss', 'content': 0.04490602761507034, 'timestamp': '2025-10-01 04:26:48.109480', 'step': 11246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:48.145120', 'step': 11246, 'epoch': 2} {'type': 'loss', 'content': 0.119168721139431, 'timestamp': '2025-10-01 04:26:48.147784', 'step': 11247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:26:48.181755', 'step': 11247, 'epoch': 2} {'type': 'loss', 'content': 0.14537808299064636, 'timestamp': '2025-10-01 04:26:48.207001', 'step': 11248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:48.243048', 'step': 11248, 'epoch': 2} {'type': 'loss', 'content': 0.12376781553030014, 'timestamp': '2025-10-01 04:26:48.264076', 'step': 11249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:48.314680', 'step': 11249, 'epoch': 2} {'type': 'loss', 'content': 0.08029406517744064, 'timestamp': '2025-10-01 04:26:48.326549', 'step': 11250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:48.358570', 'step': 11250, 'epoch': 2} {'type': 'loss', 'content': 0.10859350115060806, 'timestamp': '2025-10-01 04:26:48.367418', 'step': 11251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:48.399008', 'step': 11251, 'epoch': 2} {'type': 'loss', 'content': 0.08145590871572495, 'timestamp': '2025-10-01 04:26:48.440836', 'step': 11252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:48.489746', 'step': 11252, 'epoch': 2} {'type': 'loss', 'content': 0.16925129294395447, 'timestamp': '2025-10-01 04:26:48.497456', 'step': 11253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:48.534545', 'step': 11253, 'epoch': 2} {'type': 'loss', 'content': 0.16156940162181854, 'timestamp': '2025-10-01 04:26:48.544175', 'step': 11254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:48.595211', 'step': 11254, 'epoch': 2} {'type': 'loss', 'content': 0.11957353353500366, 'timestamp': '2025-10-01 04:26:48.597793', 'step': 11255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:48.629823', 'step': 11255, 'epoch': 2} {'type': 'loss', 'content': 0.09552185982465744, 'timestamp': '2025-10-01 04:26:48.654845', 'step': 11256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:48.689125', 'step': 11256, 'epoch': 2} {'type': 'loss', 'content': 0.03856261074542999, 'timestamp': '2025-10-01 04:26:48.693525', 'step': 11257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:48.724750', 'step': 11257, 'epoch': 2} {'type': 'loss', 'content': 0.09088506549596786, 'timestamp': '2025-10-01 04:26:48.727874', 'step': 11258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:48.759970', 'step': 11258, 'epoch': 2} {'type': 'loss', 'content': 0.18558111786842346, 'timestamp': '2025-10-01 04:26:48.762849', 'step': 11259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:48.794343', 'step': 11259, 'epoch': 2} {'type': 'loss', 'content': 0.056049685925245285, 'timestamp': '2025-10-01 04:26:48.818661', 'step': 11260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:48.850647', 'step': 11260, 'epoch': 2} {'type': 'loss', 'content': 0.06374789774417877, 'timestamp': '2025-10-01 04:26:48.853421', 'step': 11261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:48.884172', 'step': 11261, 'epoch': 2} {'type': 'loss', 'content': 0.06718992441892624, 'timestamp': '2025-10-01 04:26:48.886406', 'step': 11262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:48.923682', 'step': 11262, 'epoch': 2} {'type': 'loss', 'content': 0.04753267765045166, 'timestamp': '2025-10-01 04:26:48.928141', 'step': 11263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:48.962971', 'step': 11263, 'epoch': 2} {'type': 'loss', 'content': 0.14514033496379852, 'timestamp': '2025-10-01 04:26:49.001609', 'step': 11264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:49.033415', 'step': 11264, 'epoch': 2} {'type': 'loss', 'content': 0.07174377143383026, 'timestamp': '2025-10-01 04:26:49.035749', 'step': 11265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:49.066569', 'step': 11265, 'epoch': 2} {'type': 'loss', 'content': 0.08968877792358398, 'timestamp': '2025-10-01 04:26:49.070018', 'step': 11266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:49.117735', 'step': 11266, 'epoch': 2} {'type': 'loss', 'content': 0.10032229870557785, 'timestamp': '2025-10-01 04:26:49.120155', 'step': 11267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:49.155213', 'step': 11267, 'epoch': 2} {'type': 'loss', 'content': 0.02345910854637623, 'timestamp': '2025-10-01 04:26:49.181504', 'step': 11268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:49.212596', 'step': 11268, 'epoch': 2} {'type': 'loss', 'content': 0.144448384642601, 'timestamp': '2025-10-01 04:26:49.215772', 'step': 11269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:49.247189', 'step': 11269, 'epoch': 2} {'type': 'loss', 'content': 0.08900136500597, 'timestamp': '2025-10-01 04:26:49.257033', 'step': 11270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:49.287596', 'step': 11270, 'epoch': 2} {'type': 'loss', 'content': 0.09429718554019928, 'timestamp': '2025-10-01 04:26:49.290805', 'step': 11271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:49.330252', 'step': 11271, 'epoch': 2} {'type': 'loss', 'content': 0.13232584297657013, 'timestamp': '2025-10-01 04:26:49.355059', 'step': 11272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:49.386810', 'step': 11272, 'epoch': 2} {'type': 'loss', 'content': 0.12304940074682236, 'timestamp': '2025-10-01 04:26:49.389260', 'step': 11273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:49.421069', 'step': 11273, 'epoch': 2} {'type': 'loss', 'content': 0.0309641994535923, 'timestamp': '2025-10-01 04:26:49.424129', 'step': 11274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:49.455502', 'step': 11274, 'epoch': 2} {'type': 'loss', 'content': 0.10538794845342636, 'timestamp': '2025-10-01 04:26:49.457826', 'step': 11275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:49.489495', 'step': 11275, 'epoch': 2} {'type': 'loss', 'content': 0.12780198454856873, 'timestamp': '2025-10-01 04:26:49.518289', 'step': 11276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:49.550021', 'step': 11276, 'epoch': 2} {'type': 'loss', 'content': 0.11412765830755234, 'timestamp': '2025-10-01 04:26:49.552529', 'step': 11277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:49.584467', 'step': 11277, 'epoch': 2} {'type': 'loss', 'content': 0.02835819125175476, 'timestamp': '2025-10-01 04:26:49.586896', 'step': 11278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:49.619994', 'step': 11278, 'epoch': 2} {'type': 'loss', 'content': 0.1569278985261917, 'timestamp': '2025-10-01 04:26:49.622788', 'step': 11279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:49.653325', 'step': 11279, 'epoch': 2} {'type': 'loss', 'content': 0.13143378496170044, 'timestamp': '2025-10-01 04:26:49.677329', 'step': 11280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:49.710619', 'step': 11280, 'epoch': 2} {'type': 'loss', 'content': 0.0871688723564148, 'timestamp': '2025-10-01 04:26:49.713691', 'step': 11281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:49.744510', 'step': 11281, 'epoch': 2} {'type': 'loss', 'content': 0.13778573274612427, 'timestamp': '2025-10-01 04:26:49.746887', 'step': 11282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:49.777360', 'step': 11282, 'epoch': 2} {'type': 'loss', 'content': 0.03505352512001991, 'timestamp': '2025-10-01 04:26:49.782188', 'step': 11283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:49.832643', 'step': 11283, 'epoch': 2} {'type': 'loss', 'content': 0.14312608540058136, 'timestamp': '2025-10-01 04:26:49.856991', 'step': 11284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:49.903261', 'step': 11284, 'epoch': 2} {'type': 'loss', 'content': 0.08170807361602783, 'timestamp': '2025-10-01 04:26:49.906564', 'step': 11285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:49.957556', 'step': 11285, 'epoch': 2} {'type': 'loss', 'content': 0.15723814070224762, 'timestamp': '2025-10-01 04:26:49.960400', 'step': 11286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:49.997217', 'step': 11286, 'epoch': 2} {'type': 'loss', 'content': 0.0517960749566555, 'timestamp': '2025-10-01 04:26:49.999605', 'step': 11287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:50.040832', 'step': 11287, 'epoch': 2} {'type': 'loss', 'content': 0.17499980330467224, 'timestamp': '2025-10-01 04:26:50.064919', 'step': 11288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:50.105561', 'step': 11288, 'epoch': 2} {'type': 'loss', 'content': 0.09044549614191055, 'timestamp': '2025-10-01 04:26:50.108195', 'step': 11289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:50.166724', 'step': 11289, 'epoch': 2} {'type': 'loss', 'content': 0.08663850277662277, 'timestamp': '2025-10-01 04:26:50.173254', 'step': 11290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:50.215382', 'step': 11290, 'epoch': 2} {'type': 'loss', 'content': 0.08041371405124664, 'timestamp': '2025-10-01 04:26:50.221496', 'step': 11291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:50.263967', 'step': 11291, 'epoch': 2} {'type': 'loss', 'content': 0.13016465306282043, 'timestamp': '2025-10-01 04:26:50.290316', 'step': 11292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:50.351473', 'step': 11292, 'epoch': 2} {'type': 'loss', 'content': 0.1139497309923172, 'timestamp': '2025-10-01 04:26:50.353783', 'step': 11293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:50.385525', 'step': 11293, 'epoch': 2} {'type': 'loss', 'content': 0.13691647350788116, 'timestamp': '2025-10-01 04:26:50.390366', 'step': 11294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:50.436512', 'step': 11294, 'epoch': 2} {'type': 'loss', 'content': 0.12896372377872467, 'timestamp': '2025-10-01 04:26:50.440541', 'step': 11295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:50.498604', 'step': 11295, 'epoch': 2} {'type': 'loss', 'content': 0.1276942640542984, 'timestamp': '2025-10-01 04:26:50.529674', 'step': 11296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:50.570711', 'step': 11296, 'epoch': 2} {'type': 'loss', 'content': 0.13397184014320374, 'timestamp': '2025-10-01 04:26:50.575148', 'step': 11297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:50.620042', 'step': 11297, 'epoch': 2} {'type': 'loss', 'content': 0.08658646792173386, 'timestamp': '2025-10-01 04:26:50.622474', 'step': 11298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:50.658665', 'step': 11298, 'epoch': 2} {'type': 'loss', 'content': 0.10923706740140915, 'timestamp': '2025-10-01 04:26:50.660911', 'step': 11299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:50.704699', 'step': 11299, 'epoch': 2} {'type': 'loss', 'content': 0.1440098136663437, 'timestamp': '2025-10-01 04:26:50.728590', 'step': 11300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:50.770603', 'step': 11300, 'epoch': 2} {'type': 'loss', 'content': 0.17203287780284882, 'timestamp': '2025-10-01 04:26:50.778580', 'step': 11301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:50.817719', 'step': 11301, 'epoch': 2} {'type': 'loss', 'content': 0.06904461979866028, 'timestamp': '2025-10-01 04:26:50.820194', 'step': 11302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:50.869344', 'step': 11302, 'epoch': 2} {'type': 'loss', 'content': 0.12220649421215057, 'timestamp': '2025-10-01 04:26:50.873452', 'step': 11303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:50.907580', 'step': 11303, 'epoch': 2} {'type': 'loss', 'content': 0.15577608346939087, 'timestamp': '2025-10-01 04:26:50.935390', 'step': 11304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:50.997651', 'step': 11304, 'epoch': 2} {'type': 'loss', 'content': 0.1940739005804062, 'timestamp': '2025-10-01 04:26:51.002959', 'step': 11305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:51.058626', 'step': 11305, 'epoch': 2} {'type': 'loss', 'content': 0.09590812027454376, 'timestamp': '2025-10-01 04:26:51.060861', 'step': 11306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:51.107915', 'step': 11306, 'epoch': 2} {'type': 'loss', 'content': 0.19340218603610992, 'timestamp': '2025-10-01 04:26:51.110300', 'step': 11307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:51.165236', 'step': 11307, 'epoch': 2} {'type': 'loss', 'content': 0.20273736119270325, 'timestamp': '2025-10-01 04:26:51.191414', 'step': 11308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:51.223444', 'step': 11308, 'epoch': 2} {'type': 'loss', 'content': 0.1550821214914322, 'timestamp': '2025-10-01 04:26:51.225750', 'step': 11309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:51.257645', 'step': 11309, 'epoch': 2} {'type': 'loss', 'content': 0.11658254265785217, 'timestamp': '2025-10-01 04:26:51.261053', 'step': 11310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:51.292574', 'step': 11310, 'epoch': 2} {'type': 'loss', 'content': 0.09813636541366577, 'timestamp': '2025-10-01 04:26:51.305837', 'step': 11311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:51.339127', 'step': 11311, 'epoch': 2} {'type': 'loss', 'content': 0.08399790525436401, 'timestamp': '2025-10-01 04:26:51.363356', 'step': 11312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:51.395106', 'step': 11312, 'epoch': 2} {'type': 'loss', 'content': 0.1516726016998291, 'timestamp': '2025-10-01 04:26:51.399995', 'step': 11313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:51.432552', 'step': 11313, 'epoch': 2} {'type': 'loss', 'content': 0.1409541815519333, 'timestamp': '2025-10-01 04:26:51.434926', 'step': 11314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:51.465616', 'step': 11314, 'epoch': 2} {'type': 'loss', 'content': 0.14198774099349976, 'timestamp': '2025-10-01 04:26:51.467925', 'step': 11315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:51.498507', 'step': 11315, 'epoch': 2} {'type': 'loss', 'content': 0.14399820566177368, 'timestamp': '2025-10-01 04:26:51.522544', 'step': 11316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:51.553135', 'step': 11316, 'epoch': 2} {'type': 'loss', 'content': 0.13464893400669098, 'timestamp': '2025-10-01 04:26:51.555582', 'step': 11317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:51.590526', 'step': 11317, 'epoch': 2} {'type': 'loss', 'content': 0.12046336382627487, 'timestamp': '2025-10-01 04:26:51.592763', 'step': 11318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:51.627155', 'step': 11318, 'epoch': 2} {'type': 'loss', 'content': 0.06538529694080353, 'timestamp': '2025-10-01 04:26:51.629385', 'step': 11319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:51.660810', 'step': 11319, 'epoch': 2} {'type': 'loss', 'content': 0.06766058504581451, 'timestamp': '2025-10-01 04:26:51.684617', 'step': 11320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:51.715853', 'step': 11320, 'epoch': 2} {'type': 'loss', 'content': 0.1871178299188614, 'timestamp': '2025-10-01 04:26:51.718242', 'step': 11321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:51.749889', 'step': 11321, 'epoch': 2} {'type': 'loss', 'content': 0.12630215287208557, 'timestamp': '2025-10-01 04:26:51.752223', 'step': 11322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:51.783342', 'step': 11322, 'epoch': 2} {'type': 'loss', 'content': 0.10912010818719864, 'timestamp': '2025-10-01 04:26:51.799878', 'step': 11323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:51.836739', 'step': 11323, 'epoch': 2} {'type': 'loss', 'content': 0.04923202469944954, 'timestamp': '2025-10-01 04:26:51.860796', 'step': 11324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:51.893998', 'step': 11324, 'epoch': 2} {'type': 'loss', 'content': 0.22808682918548584, 'timestamp': '2025-10-01 04:26:51.911766', 'step': 11325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:51.944844', 'step': 11325, 'epoch': 2} {'type': 'loss', 'content': 0.2094331830739975, 'timestamp': '2025-10-01 04:26:51.946916', 'step': 11326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:51.994341', 'step': 11326, 'epoch': 2} {'type': 'loss', 'content': 0.06588783860206604, 'timestamp': '2025-10-01 04:26:51.996755', 'step': 11327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:52.030800', 'step': 11327, 'epoch': 2} {'type': 'loss', 'content': 0.04154273122549057, 'timestamp': '2025-10-01 04:26:52.054764', 'step': 11328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:52.089319', 'step': 11328, 'epoch': 2} {'type': 'loss', 'content': 0.09786916524171829, 'timestamp': '2025-10-01 04:26:52.091655', 'step': 11329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:52.129405', 'step': 11329, 'epoch': 2} {'type': 'loss', 'content': 0.1301477700471878, 'timestamp': '2025-10-01 04:26:52.131938', 'step': 11330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:52.166879', 'step': 11330, 'epoch': 2} {'type': 'loss', 'content': 0.052473992109298706, 'timestamp': '2025-10-01 04:26:52.169430', 'step': 11331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:52.204272', 'step': 11331, 'epoch': 2} {'type': 'loss', 'content': 0.22671934962272644, 'timestamp': '2025-10-01 04:26:52.228748', 'step': 11332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:52.265307', 'step': 11332, 'epoch': 2} {'type': 'loss', 'content': 0.21167723834514618, 'timestamp': '2025-10-01 04:26:52.267550', 'step': 11333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:52.299275', 'step': 11333, 'epoch': 2} {'type': 'loss', 'content': 0.11910118162631989, 'timestamp': '2025-10-01 04:26:52.301816', 'step': 11334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:52.333891', 'step': 11334, 'epoch': 2} {'type': 'loss', 'content': 0.05473674088716507, 'timestamp': '2025-10-01 04:26:52.336200', 'step': 11335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:52.370411', 'step': 11335, 'epoch': 2} {'type': 'loss', 'content': 0.0577053539454937, 'timestamp': '2025-10-01 04:26:52.394214', 'step': 11336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:52.425326', 'step': 11336, 'epoch': 2} {'type': 'loss', 'content': 0.05687612295150757, 'timestamp': '2025-10-01 04:26:52.427456', 'step': 11337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:52.459114', 'step': 11337, 'epoch': 2} {'type': 'loss', 'content': 0.09786402434110641, 'timestamp': '2025-10-01 04:26:52.476887', 'step': 11338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:52.526586', 'step': 11338, 'epoch': 2} {'type': 'loss', 'content': 0.183250293135643, 'timestamp': '2025-10-01 04:26:52.528867', 'step': 11339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:52.560235', 'step': 11339, 'epoch': 2} {'type': 'loss', 'content': 0.06497606635093689, 'timestamp': '2025-10-01 04:26:52.584112', 'step': 11340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:52.616079', 'step': 11340, 'epoch': 2} {'type': 'loss', 'content': 0.12702462077140808, 'timestamp': '2025-10-01 04:26:52.618447', 'step': 11341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:52.653985', 'step': 11341, 'epoch': 2} {'type': 'loss', 'content': 0.11276416480541229, 'timestamp': '2025-10-01 04:26:52.656358', 'step': 11342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:52.689668', 'step': 11342, 'epoch': 2} {'type': 'loss', 'content': 0.06469558924436569, 'timestamp': '2025-10-01 04:26:52.692027', 'step': 11343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:52.725660', 'step': 11343, 'epoch': 2} {'type': 'loss', 'content': 0.14902092516422272, 'timestamp': '2025-10-01 04:26:52.749495', 'step': 11344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:52.781291', 'step': 11344, 'epoch': 2} {'type': 'loss', 'content': 0.1568368524312973, 'timestamp': '2025-10-01 04:26:52.783711', 'step': 11345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:52.816020', 'step': 11345, 'epoch': 2} {'type': 'loss', 'content': 0.07056267559528351, 'timestamp': '2025-10-01 04:26:52.818642', 'step': 11346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:52.861430', 'step': 11346, 'epoch': 2} {'type': 'loss', 'content': 0.10828342288732529, 'timestamp': '2025-10-01 04:26:52.863804', 'step': 11347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:52.895651', 'step': 11347, 'epoch': 2} {'type': 'loss', 'content': 0.1331637054681778, 'timestamp': '2025-10-01 04:26:52.921391', 'step': 11348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:52.973148', 'step': 11348, 'epoch': 2} {'type': 'loss', 'content': 0.13671857118606567, 'timestamp': '2025-10-01 04:26:52.975520', 'step': 11349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:53.008302', 'step': 11349, 'epoch': 2} {'type': 'loss', 'content': 0.1377945840358734, 'timestamp': '2025-10-01 04:26:53.010792', 'step': 11350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:53.044761', 'step': 11350, 'epoch': 2} {'type': 'loss', 'content': 0.12638847529888153, 'timestamp': '2025-10-01 04:26:53.047455', 'step': 11351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:53.080324', 'step': 11351, 'epoch': 2} {'type': 'loss', 'content': 0.1485304981470108, 'timestamp': '2025-10-01 04:26:53.104096', 'step': 11352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:53.137026', 'step': 11352, 'epoch': 2} {'type': 'loss', 'content': 0.05526471510529518, 'timestamp': '2025-10-01 04:26:53.139436', 'step': 11353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:53.174800', 'step': 11353, 'epoch': 2} {'type': 'loss', 'content': 0.11965478211641312, 'timestamp': '2025-10-01 04:26:53.177225', 'step': 11354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:53.208613', 'step': 11354, 'epoch': 2} {'type': 'loss', 'content': 0.12812471389770508, 'timestamp': '2025-10-01 04:26:53.210841', 'step': 11355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:53.243075', 'step': 11355, 'epoch': 2} {'type': 'loss', 'content': 0.08539360761642456, 'timestamp': '2025-10-01 04:26:53.266820', 'step': 11356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:53.298684', 'step': 11356, 'epoch': 2} {'type': 'loss', 'content': 0.0948384553194046, 'timestamp': '2025-10-01 04:26:53.300924', 'step': 11357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:53.354795', 'step': 11357, 'epoch': 2} {'type': 'loss', 'content': 0.08107613027095795, 'timestamp': '2025-10-01 04:26:53.357139', 'step': 11358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:53.391007', 'step': 11358, 'epoch': 2} {'type': 'loss', 'content': 0.09119857102632523, 'timestamp': '2025-10-01 04:26:53.393509', 'step': 11359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:53.429961', 'step': 11359, 'epoch': 2} {'type': 'loss', 'content': 0.147613987326622, 'timestamp': '2025-10-01 04:26:53.453760', 'step': 11360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:53.485534', 'step': 11360, 'epoch': 2} {'type': 'loss', 'content': 0.15749476850032806, 'timestamp': '2025-10-01 04:26:53.487876', 'step': 11361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:53.518405', 'step': 11361, 'epoch': 2} {'type': 'loss', 'content': 0.0631953626871109, 'timestamp': '2025-10-01 04:26:53.521723', 'step': 11362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:53.555111', 'step': 11362, 'epoch': 2} {'type': 'loss', 'content': 0.09152963757514954, 'timestamp': '2025-10-01 04:26:53.575483', 'step': 11363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:53.607596', 'step': 11363, 'epoch': 2} {'type': 'loss', 'content': 0.07356330007314682, 'timestamp': '2025-10-01 04:26:53.631505', 'step': 11364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:53.672124', 'step': 11364, 'epoch': 2} {'type': 'loss', 'content': 0.12659452855587006, 'timestamp': '2025-10-01 04:26:53.674425', 'step': 11365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:53.705136', 'step': 11365, 'epoch': 2} {'type': 'loss', 'content': 0.08716246485710144, 'timestamp': '2025-10-01 04:26:53.711070', 'step': 11366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:53.741551', 'step': 11366, 'epoch': 2} {'type': 'loss', 'content': 0.11599913239479065, 'timestamp': '2025-10-01 04:26:53.743957', 'step': 11367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:53.775111', 'step': 11367, 'epoch': 2} {'type': 'loss', 'content': 0.05407878756523132, 'timestamp': '2025-10-01 04:26:53.799028', 'step': 11368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:53.835440', 'step': 11368, 'epoch': 2} {'type': 'loss', 'content': 0.14314314723014832, 'timestamp': '2025-10-01 04:26:53.837786', 'step': 11369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:53.868799', 'step': 11369, 'epoch': 2} {'type': 'loss', 'content': 0.13834014534950256, 'timestamp': '2025-10-01 04:26:53.871024', 'step': 11370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:53.901880', 'step': 11370, 'epoch': 2} {'type': 'loss', 'content': 0.09935488551855087, 'timestamp': '2025-10-01 04:26:53.904190', 'step': 11371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:53.946267', 'step': 11371, 'epoch': 2} {'type': 'loss', 'content': 0.0996755063533783, 'timestamp': '2025-10-01 04:26:53.969952', 'step': 11372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:54.002172', 'step': 11372, 'epoch': 2} {'type': 'loss', 'content': 0.09136321395635605, 'timestamp': '2025-10-01 04:26:54.004496', 'step': 11373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:54.035962', 'step': 11373, 'epoch': 2} {'type': 'loss', 'content': 0.06597313284873962, 'timestamp': '2025-10-01 04:26:54.038404', 'step': 11374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:54.081514', 'step': 11374, 'epoch': 2} {'type': 'loss', 'content': 0.15210917592048645, 'timestamp': '2025-10-01 04:26:54.083971', 'step': 11375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:54.114343', 'step': 11375, 'epoch': 2} {'type': 'loss', 'content': 0.05018927901983261, 'timestamp': '2025-10-01 04:26:54.138080', 'step': 11376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:54.170502', 'step': 11376, 'epoch': 2} {'type': 'loss', 'content': 0.14452609419822693, 'timestamp': '2025-10-01 04:26:54.172969', 'step': 11377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:54.204786', 'step': 11377, 'epoch': 2} {'type': 'loss', 'content': 0.19527071714401245, 'timestamp': '2025-10-01 04:26:54.207309', 'step': 11378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:54.251925', 'step': 11378, 'epoch': 2} {'type': 'loss', 'content': 0.20840592682361603, 'timestamp': '2025-10-01 04:26:54.254278', 'step': 11379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:54.284874', 'step': 11379, 'epoch': 2} {'type': 'loss', 'content': 0.11926194280385971, 'timestamp': '2025-10-01 04:26:54.308616', 'step': 11380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:54.339913', 'step': 11380, 'epoch': 2} {'type': 'loss', 'content': 0.10232414305210114, 'timestamp': '2025-10-01 04:26:54.342253', 'step': 11381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:54.374140', 'step': 11381, 'epoch': 2} {'type': 'loss', 'content': 0.14030210673809052, 'timestamp': '2025-10-01 04:26:54.376747', 'step': 11382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:54.408103', 'step': 11382, 'epoch': 2} {'type': 'loss', 'content': 0.09793195128440857, 'timestamp': '2025-10-01 04:26:54.410919', 'step': 11383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:54.443050', 'step': 11383, 'epoch': 2} {'type': 'loss', 'content': 0.1092219352722168, 'timestamp': '2025-10-01 04:26:54.467112', 'step': 11384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:54.498176', 'step': 11384, 'epoch': 2} {'type': 'loss', 'content': 0.06821446120738983, 'timestamp': '2025-10-01 04:26:54.500790', 'step': 11385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:54.531465', 'step': 11385, 'epoch': 2} {'type': 'loss', 'content': 0.1578245609998703, 'timestamp': '2025-10-01 04:26:54.533858', 'step': 11386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:54.566759', 'step': 11386, 'epoch': 2} {'type': 'loss', 'content': 0.058068860322237015, 'timestamp': '2025-10-01 04:26:54.569029', 'step': 11387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:54.604039', 'step': 11387, 'epoch': 2} {'type': 'loss', 'content': 0.07949953526258469, 'timestamp': '2025-10-01 04:26:54.628125', 'step': 11388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:54.659514', 'step': 11388, 'epoch': 2} {'type': 'loss', 'content': 0.0665992796421051, 'timestamp': '2025-10-01 04:26:54.662015', 'step': 11389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:54.693598', 'step': 11389, 'epoch': 2} {'type': 'loss', 'content': 0.08206215500831604, 'timestamp': '2025-10-01 04:26:54.696522', 'step': 11390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:54.726758', 'step': 11390, 'epoch': 2} {'type': 'loss', 'content': 0.16069015860557556, 'timestamp': '2025-10-01 04:26:54.729101', 'step': 11391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:54.778812', 'step': 11391, 'epoch': 2} {'type': 'loss', 'content': 0.09856966882944107, 'timestamp': '2025-10-01 04:26:54.802552', 'step': 11392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:54.840081', 'step': 11392, 'epoch': 2} {'type': 'loss', 'content': 0.11249809712171555, 'timestamp': '2025-10-01 04:26:54.845550', 'step': 11393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:54.876854', 'step': 11393, 'epoch': 2} {'type': 'loss', 'content': 0.09212519228458405, 'timestamp': '2025-10-01 04:26:54.879145', 'step': 11394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:54.915033', 'step': 11394, 'epoch': 2} {'type': 'loss', 'content': 0.15148963034152985, 'timestamp': '2025-10-01 04:26:54.917520', 'step': 11395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:54.957146', 'step': 11395, 'epoch': 2} {'type': 'loss', 'content': 0.04170596972107887, 'timestamp': '2025-10-01 04:26:54.981148', 'step': 11396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:55.012201', 'step': 11396, 'epoch': 2} {'type': 'loss', 'content': 0.11790711432695389, 'timestamp': '2025-10-01 04:26:55.019890', 'step': 11397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:55.054961', 'step': 11397, 'epoch': 2} {'type': 'loss', 'content': 0.11612549424171448, 'timestamp': '2025-10-01 04:26:55.057566', 'step': 11398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:55.102664', 'step': 11398, 'epoch': 2} {'type': 'loss', 'content': 0.1299183964729309, 'timestamp': '2025-10-01 04:26:55.105123', 'step': 11399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:55.135870', 'step': 11399, 'epoch': 2} {'type': 'loss', 'content': 0.09696599841117859, 'timestamp': '2025-10-01 04:26:55.160344', 'step': 11400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:55.191100', 'step': 11400, 'epoch': 2} {'type': 'loss', 'content': 0.11280760914087296, 'timestamp': '2025-10-01 04:26:55.193611', 'step': 11401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:55.225534', 'step': 11401, 'epoch': 2} {'type': 'loss', 'content': 0.16994960606098175, 'timestamp': '2025-10-01 04:26:55.227902', 'step': 11402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:55.258770', 'step': 11402, 'epoch': 2} {'type': 'loss', 'content': 0.12588956952095032, 'timestamp': '2025-10-01 04:26:55.261327', 'step': 11403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:55.319476', 'step': 11403, 'epoch': 2} {'type': 'loss', 'content': 0.12378828227519989, 'timestamp': '2025-10-01 04:26:55.344701', 'step': 11404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:55.376470', 'step': 11404, 'epoch': 2} {'type': 'loss', 'content': 0.14157424867153168, 'timestamp': '2025-10-01 04:26:55.378775', 'step': 11405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:55.408898', 'step': 11405, 'epoch': 2} {'type': 'loss', 'content': 0.1617530733346939, 'timestamp': '2025-10-01 04:26:55.411203', 'step': 11406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:55.443883', 'step': 11406, 'epoch': 2} {'type': 'loss', 'content': 0.10819070041179657, 'timestamp': '2025-10-01 04:26:55.446695', 'step': 11407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:55.478052', 'step': 11407, 'epoch': 2} {'type': 'loss', 'content': 0.17420253157615662, 'timestamp': '2025-10-01 04:26:55.501936', 'step': 11408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:55.533303', 'step': 11408, 'epoch': 2} {'type': 'loss', 'content': 0.0812719538807869, 'timestamp': '2025-10-01 04:26:55.535936', 'step': 11409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:55.566785', 'step': 11409, 'epoch': 2} {'type': 'loss', 'content': 0.10880911350250244, 'timestamp': '2025-10-01 04:26:55.569464', 'step': 11410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:55.600809', 'step': 11410, 'epoch': 2} {'type': 'loss', 'content': 0.24498465657234192, 'timestamp': '2025-10-01 04:26:55.603401', 'step': 11411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:55.633995', 'step': 11411, 'epoch': 2} {'type': 'loss', 'content': 0.11523737758398056, 'timestamp': '2025-10-01 04:26:55.657928', 'step': 11412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:55.692982', 'step': 11412, 'epoch': 2} {'type': 'loss', 'content': 0.14182014763355255, 'timestamp': '2025-10-01 04:26:55.695203', 'step': 11413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:55.727785', 'step': 11413, 'epoch': 2} {'type': 'loss', 'content': 0.0308036208152771, 'timestamp': '2025-10-01 04:26:55.730150', 'step': 11414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:55.765315', 'step': 11414, 'epoch': 2} {'type': 'loss', 'content': 0.07117168605327606, 'timestamp': '2025-10-01 04:26:55.767772', 'step': 11415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:55.800510', 'step': 11415, 'epoch': 2} {'type': 'loss', 'content': 0.057675402611494064, 'timestamp': '2025-10-01 04:26:55.824400', 'step': 11416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:55.855754', 'step': 11416, 'epoch': 2} {'type': 'loss', 'content': 0.07259096205234528, 'timestamp': '2025-10-01 04:26:55.858424', 'step': 11417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:55.890007', 'step': 11417, 'epoch': 2} {'type': 'loss', 'content': 0.14269272983074188, 'timestamp': '2025-10-01 04:26:55.893297', 'step': 11418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:55.925738', 'step': 11418, 'epoch': 2} {'type': 'loss', 'content': 0.11323263496160507, 'timestamp': '2025-10-01 04:26:55.928048', 'step': 11419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:55.959632', 'step': 11419, 'epoch': 2} {'type': 'loss', 'content': 0.04786821827292442, 'timestamp': '2025-10-01 04:26:55.983664', 'step': 11420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:56.014696', 'step': 11420, 'epoch': 2} {'type': 'loss', 'content': 0.16487564146518707, 'timestamp': '2025-10-01 04:26:56.017469', 'step': 11421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:56.059645', 'step': 11421, 'epoch': 2} {'type': 'loss', 'content': 0.07940220087766647, 'timestamp': '2025-10-01 04:26:56.062009', 'step': 11422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:56.093414', 'step': 11422, 'epoch': 2} {'type': 'loss', 'content': 0.11990216374397278, 'timestamp': '2025-10-01 04:26:56.097869', 'step': 11423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:56.129219', 'step': 11423, 'epoch': 2} {'type': 'loss', 'content': 0.0971367359161377, 'timestamp': '2025-10-01 04:26:56.153792', 'step': 11424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:56.184542', 'step': 11424, 'epoch': 2} {'type': 'loss', 'content': 0.09103688597679138, 'timestamp': '2025-10-01 04:26:56.187306', 'step': 11425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:56.220002', 'step': 11425, 'epoch': 2} {'type': 'loss', 'content': 0.21863655745983124, 'timestamp': '2025-10-01 04:26:56.222934', 'step': 11426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:56.255745', 'step': 11426, 'epoch': 2} {'type': 'loss', 'content': 0.060580722987651825, 'timestamp': '2025-10-01 04:26:56.258365', 'step': 11427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:56.291233', 'step': 11427, 'epoch': 2} {'type': 'loss', 'content': 0.07630699127912521, 'timestamp': '2025-10-01 04:26:56.315838', 'step': 11428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:56.347182', 'step': 11428, 'epoch': 2} {'type': 'loss', 'content': 0.19226311147212982, 'timestamp': '2025-10-01 04:26:56.350420', 'step': 11429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:56.381289', 'step': 11429, 'epoch': 2} {'type': 'loss', 'content': 0.16291117668151855, 'timestamp': '2025-10-01 04:26:56.393414', 'step': 11430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:26:56.425609', 'step': 11430, 'epoch': 2} {'type': 'loss', 'content': 0.05716516077518463, 'timestamp': '2025-10-01 04:26:56.428658', 'step': 11431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:56.461379', 'step': 11431, 'epoch': 2} {'type': 'loss', 'content': 0.10358516126871109, 'timestamp': '2025-10-01 04:26:56.498674', 'step': 11432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:56.533092', 'step': 11432, 'epoch': 2} {'type': 'loss', 'content': 0.13636718690395355, 'timestamp': '2025-10-01 04:26:56.538217', 'step': 11433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:56.569216', 'step': 11433, 'epoch': 2} {'type': 'loss', 'content': 0.10885080695152283, 'timestamp': '2025-10-01 04:26:56.571856', 'step': 11434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:56.602769', 'step': 11434, 'epoch': 2} {'type': 'loss', 'content': 0.11418836563825607, 'timestamp': '2025-10-01 04:26:56.606299', 'step': 11435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:56.638103', 'step': 11435, 'epoch': 2} {'type': 'loss', 'content': 0.08194521069526672, 'timestamp': '2025-10-01 04:26:56.662339', 'step': 11436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:56.694299', 'step': 11436, 'epoch': 2} {'type': 'loss', 'content': 0.13488948345184326, 'timestamp': '2025-10-01 04:26:56.697010', 'step': 11437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:56.728957', 'step': 11437, 'epoch': 2} {'type': 'loss', 'content': 0.09893796592950821, 'timestamp': '2025-10-01 04:26:56.731442', 'step': 11438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:56.762673', 'step': 11438, 'epoch': 2} {'type': 'loss', 'content': 0.17162932455539703, 'timestamp': '2025-10-01 04:26:56.765362', 'step': 11439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:56.798500', 'step': 11439, 'epoch': 2} {'type': 'loss', 'content': 0.09831732511520386, 'timestamp': '2025-10-01 04:26:56.822862', 'step': 11440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:56.871126', 'step': 11440, 'epoch': 2} {'type': 'loss', 'content': 0.07485441118478775, 'timestamp': '2025-10-01 04:26:56.874216', 'step': 11441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:26:56.905590', 'step': 11441, 'epoch': 2} {'type': 'loss', 'content': 0.10792391002178192, 'timestamp': '2025-10-01 04:26:56.910293', 'step': 11442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:56.943545', 'step': 11442, 'epoch': 2} {'type': 'loss', 'content': 0.08971807360649109, 'timestamp': '2025-10-01 04:26:56.946769', 'step': 11443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:56.978394', 'step': 11443, 'epoch': 2} {'type': 'loss', 'content': 0.05007512494921684, 'timestamp': '2025-10-01 04:26:57.002607', 'step': 11444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:57.034239', 'step': 11444, 'epoch': 2} {'type': 'loss', 'content': 0.08754850178956985, 'timestamp': '2025-10-01 04:26:57.036870', 'step': 11445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:57.067833', 'step': 11445, 'epoch': 2} {'type': 'loss', 'content': 0.05383092164993286, 'timestamp': '2025-10-01 04:26:57.070747', 'step': 11446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:57.110652', 'step': 11446, 'epoch': 2} {'type': 'loss', 'content': 0.08925063908100128, 'timestamp': '2025-10-01 04:26:57.114065', 'step': 11447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:57.151692', 'step': 11447, 'epoch': 2} {'type': 'loss', 'content': 0.054520852863788605, 'timestamp': '2025-10-01 04:26:57.176294', 'step': 11448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:57.208978', 'step': 11448, 'epoch': 2} {'type': 'loss', 'content': 0.10207319259643555, 'timestamp': '2025-10-01 04:26:57.212099', 'step': 11449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:26:57.243761', 'step': 11449, 'epoch': 2} {'type': 'loss', 'content': 0.08318828046321869, 'timestamp': '2025-10-01 04:26:57.247950', 'step': 11450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:57.279474', 'step': 11450, 'epoch': 2} {'type': 'loss', 'content': 0.10893208533525467, 'timestamp': '2025-10-01 04:26:57.282255', 'step': 11451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:57.315957', 'step': 11451, 'epoch': 2} {'type': 'loss', 'content': 0.048232126981019974, 'timestamp': '2025-10-01 04:26:57.340477', 'step': 11452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:57.374820', 'step': 11452, 'epoch': 2} {'type': 'loss', 'content': 0.05988968536257744, 'timestamp': '2025-10-01 04:26:57.377253', 'step': 11453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:57.409021', 'step': 11453, 'epoch': 2} {'type': 'loss', 'content': 0.07191900163888931, 'timestamp': '2025-10-01 04:26:57.411361', 'step': 11454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:57.441908', 'step': 11454, 'epoch': 2} {'type': 'loss', 'content': 0.036991268396377563, 'timestamp': '2025-10-01 04:26:57.446830', 'step': 11455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:57.478155', 'step': 11455, 'epoch': 2} {'type': 'loss', 'content': 0.1275189071893692, 'timestamp': '2025-10-01 04:26:57.502605', 'step': 11456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:57.533478', 'step': 11456, 'epoch': 2} {'type': 'loss', 'content': 0.06339584290981293, 'timestamp': '2025-10-01 04:26:57.536388', 'step': 11457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:57.567952', 'step': 11457, 'epoch': 2} {'type': 'loss', 'content': 0.15769854187965393, 'timestamp': '2025-10-01 04:26:57.570742', 'step': 11458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:57.602784', 'step': 11458, 'epoch': 2} {'type': 'loss', 'content': 0.08758722990751266, 'timestamp': '2025-10-01 04:26:57.605386', 'step': 11459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:57.638019', 'step': 11459, 'epoch': 2} {'type': 'loss', 'content': 0.14890676736831665, 'timestamp': '2025-10-01 04:26:57.662145', 'step': 11460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:57.693407', 'step': 11460, 'epoch': 2} {'type': 'loss', 'content': 0.09929068386554718, 'timestamp': '2025-10-01 04:26:57.717881', 'step': 11461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:57.751062', 'step': 11461, 'epoch': 2} {'type': 'loss', 'content': 0.052742186933755875, 'timestamp': '2025-10-01 04:26:57.754879', 'step': 11462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:57.786721', 'step': 11462, 'epoch': 2} {'type': 'loss', 'content': 0.09475833177566528, 'timestamp': '2025-10-01 04:26:57.789133', 'step': 11463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:57.825468', 'step': 11463, 'epoch': 2} {'type': 'loss', 'content': 0.07743176072835922, 'timestamp': '2025-10-01 04:26:57.850136', 'step': 11464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:57.887003', 'step': 11464, 'epoch': 2} {'type': 'loss', 'content': 0.1969219446182251, 'timestamp': '2025-10-01 04:26:57.902914', 'step': 11465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:57.940842', 'step': 11465, 'epoch': 2} {'type': 'loss', 'content': 0.20406824350357056, 'timestamp': '2025-10-01 04:26:57.943500', 'step': 11466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:57.976022', 'step': 11466, 'epoch': 2} {'type': 'loss', 'content': 0.12743374705314636, 'timestamp': '2025-10-01 04:26:57.978761', 'step': 11467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:58.009671', 'step': 11467, 'epoch': 2} {'type': 'loss', 'content': 0.09754287451505661, 'timestamp': '2025-10-01 04:26:58.034783', 'step': 11468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:58.067440', 'step': 11468, 'epoch': 2} {'type': 'loss', 'content': 0.17018119990825653, 'timestamp': '2025-10-01 04:26:58.069760', 'step': 11469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:58.115265', 'step': 11469, 'epoch': 2} {'type': 'loss', 'content': 0.08141151815652847, 'timestamp': '2025-10-01 04:26:58.122406', 'step': 11470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:58.153684', 'step': 11470, 'epoch': 2} {'type': 'loss', 'content': 0.10839881002902985, 'timestamp': '2025-10-01 04:26:58.155857', 'step': 11471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:58.187471', 'step': 11471, 'epoch': 2} {'type': 'loss', 'content': 0.09217238426208496, 'timestamp': '2025-10-01 04:26:58.211389', 'step': 11472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:58.242804', 'step': 11472, 'epoch': 2} {'type': 'loss', 'content': 0.21243363618850708, 'timestamp': '2025-10-01 04:26:58.245061', 'step': 11473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:58.286680', 'step': 11473, 'epoch': 2} {'type': 'loss', 'content': 0.08447111397981644, 'timestamp': '2025-10-01 04:26:58.288886', 'step': 11474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:58.328628', 'step': 11474, 'epoch': 2} {'type': 'loss', 'content': 0.12656453251838684, 'timestamp': '2025-10-01 04:26:58.331694', 'step': 11475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:58.367087', 'step': 11475, 'epoch': 2} {'type': 'loss', 'content': 0.11893520504236221, 'timestamp': '2025-10-01 04:26:58.392734', 'step': 11476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:58.428266', 'step': 11476, 'epoch': 2} {'type': 'loss', 'content': 0.050945620983839035, 'timestamp': '2025-10-01 04:26:58.431435', 'step': 11477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:58.466377', 'step': 11477, 'epoch': 2} {'type': 'loss', 'content': 0.1327974945306778, 'timestamp': '2025-10-01 04:26:58.469053', 'step': 11478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:58.502017', 'step': 11478, 'epoch': 2} {'type': 'loss', 'content': 0.1221088245511055, 'timestamp': '2025-10-01 04:26:58.511468', 'step': 11479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:58.547941', 'step': 11479, 'epoch': 2} {'type': 'loss', 'content': 0.0550781786441803, 'timestamp': '2025-10-01 04:26:58.571903', 'step': 11480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:58.603887', 'step': 11480, 'epoch': 2} {'type': 'loss', 'content': 0.0888344943523407, 'timestamp': '2025-10-01 04:26:58.609293', 'step': 11481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:58.640593', 'step': 11481, 'epoch': 2} {'type': 'loss', 'content': 0.22148968279361725, 'timestamp': '2025-10-01 04:26:58.642934', 'step': 11482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:58.674372', 'step': 11482, 'epoch': 2} {'type': 'loss', 'content': 0.1324811577796936, 'timestamp': '2025-10-01 04:26:58.676797', 'step': 11483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:58.709719', 'step': 11483, 'epoch': 2} {'type': 'loss', 'content': 0.11409687995910645, 'timestamp': '2025-10-01 04:26:58.736034', 'step': 11484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:58.766730', 'step': 11484, 'epoch': 2} {'type': 'loss', 'content': 0.04172377288341522, 'timestamp': '2025-10-01 04:26:58.769048', 'step': 11485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:58.800747', 'step': 11485, 'epoch': 2} {'type': 'loss', 'content': 0.07748549431562424, 'timestamp': '2025-10-01 04:26:58.803208', 'step': 11486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:58.836990', 'step': 11486, 'epoch': 2} {'type': 'loss', 'content': 0.054523028433322906, 'timestamp': '2025-10-01 04:26:58.839564', 'step': 11487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:58.871702', 'step': 11487, 'epoch': 2} {'type': 'loss', 'content': 0.07967472076416016, 'timestamp': '2025-10-01 04:26:58.897886', 'step': 11488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:58.931804', 'step': 11488, 'epoch': 2} {'type': 'loss', 'content': 0.15430587530136108, 'timestamp': '2025-10-01 04:26:58.935871', 'step': 11489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:58.977811', 'step': 11489, 'epoch': 2} {'type': 'loss', 'content': 0.09114021807909012, 'timestamp': '2025-10-01 04:26:58.981228', 'step': 11490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:59.017304', 'step': 11490, 'epoch': 2} {'type': 'loss', 'content': 0.08726692944765091, 'timestamp': '2025-10-01 04:26:59.019719', 'step': 11491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:59.050588', 'step': 11491, 'epoch': 2} {'type': 'loss', 'content': 0.10708868503570557, 'timestamp': '2025-10-01 04:26:59.074959', 'step': 11492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:26:59.107526', 'step': 11492, 'epoch': 2} {'type': 'loss', 'content': 0.13987678289413452, 'timestamp': '2025-10-01 04:26:59.112207', 'step': 11493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:26:59.146428', 'step': 11493, 'epoch': 2} {'type': 'loss', 'content': 0.11625871062278748, 'timestamp': '2025-10-01 04:26:59.148876', 'step': 11494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:59.180390', 'step': 11494, 'epoch': 2} {'type': 'loss', 'content': 0.09981617331504822, 'timestamp': '2025-10-01 04:26:59.182662', 'step': 11495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:26:59.214693', 'step': 11495, 'epoch': 2} {'type': 'loss', 'content': 0.126967191696167, 'timestamp': '2025-10-01 04:26:59.238413', 'step': 11496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:59.269645', 'step': 11496, 'epoch': 2} {'type': 'loss', 'content': 0.10522031038999557, 'timestamp': '2025-10-01 04:26:59.271908', 'step': 11497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:59.304133', 'step': 11497, 'epoch': 2} {'type': 'loss', 'content': 0.07278996706008911, 'timestamp': '2025-10-01 04:26:59.306865', 'step': 11498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:26:59.351077', 'step': 11498, 'epoch': 2} {'type': 'loss', 'content': 0.14821673929691315, 'timestamp': '2025-10-01 04:26:59.355386', 'step': 11499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:26:59.386543', 'step': 11499, 'epoch': 2} {'type': 'loss', 'content': 0.0473690964281559, 'timestamp': '2025-10-01 04:26:59.411049', 'step': 11500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11500', 'timestamp': '2025-10-01 04:27:04.450207', 'step': 11500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:04.529439', 'step': 11500, 'epoch': 2} {'type': 'loss', 'content': 0.13367144763469696, 'timestamp': '2025-10-01 04:27:04.531953', 'step': 11501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:04.567459', 'step': 11501, 'epoch': 2} {'type': 'loss', 'content': 0.10000792890787125, 'timestamp': '2025-10-01 04:27:04.569910', 'step': 11502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:04.609201', 'step': 11502, 'epoch': 2} {'type': 'loss', 'content': 0.17181222140789032, 'timestamp': '2025-10-01 04:27:04.611645', 'step': 11503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:04.653262', 'step': 11503, 'epoch': 2} {'type': 'loss', 'content': 0.0883895605802536, 'timestamp': '2025-10-01 04:27:04.680083', 'step': 11504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:04.816472', 'step': 11504, 'epoch': 2} {'type': 'loss', 'content': 0.10698214173316956, 'timestamp': '2025-10-01 04:27:04.818885', 'step': 11505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:04.929463', 'step': 11505, 'epoch': 2} {'type': 'loss', 'content': 0.04893100634217262, 'timestamp': '2025-10-01 04:27:04.942679', 'step': 11506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:04.982470', 'step': 11506, 'epoch': 2} {'type': 'loss', 'content': 0.06325589865446091, 'timestamp': '2025-10-01 04:27:04.984948', 'step': 11507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:05.030922', 'step': 11507, 'epoch': 2} {'type': 'loss', 'content': 0.13418999314308167, 'timestamp': '2025-10-01 04:27:05.055252', 'step': 11508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:05.094755', 'step': 11508, 'epoch': 2} {'type': 'loss', 'content': 0.08166086673736572, 'timestamp': '2025-10-01 04:27:05.097174', 'step': 11509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:05.130887', 'step': 11509, 'epoch': 2} {'type': 'loss', 'content': 0.1732613444328308, 'timestamp': '2025-10-01 04:27:05.133227', 'step': 11510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:05.234233', 'step': 11510, 'epoch': 2} {'type': 'loss', 'content': 0.05687396973371506, 'timestamp': '2025-10-01 04:27:05.236976', 'step': 11511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:05.377849', 'step': 11511, 'epoch': 2} {'type': 'loss', 'content': 0.12506216764450073, 'timestamp': '2025-10-01 04:27:05.401671', 'step': 11512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:05.438070', 'step': 11512, 'epoch': 2} {'type': 'loss', 'content': 0.036953404545784, 'timestamp': '2025-10-01 04:27:05.440163', 'step': 11513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:05.479826', 'step': 11513, 'epoch': 2} {'type': 'loss', 'content': 0.11897417902946472, 'timestamp': '2025-10-01 04:27:05.482858', 'step': 11514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:05.520089', 'step': 11514, 'epoch': 2} {'type': 'loss', 'content': 0.10563568770885468, 'timestamp': '2025-10-01 04:27:05.522568', 'step': 11515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:05.577537', 'step': 11515, 'epoch': 2} {'type': 'loss', 'content': 0.06475336104631424, 'timestamp': '2025-10-01 04:27:05.601815', 'step': 11516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:05.651733', 'step': 11516, 'epoch': 2} {'type': 'loss', 'content': 0.13613827526569366, 'timestamp': '2025-10-01 04:27:05.670651', 'step': 11517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:05.850265', 'step': 11517, 'epoch': 2} {'type': 'loss', 'content': 0.1004570722579956, 'timestamp': '2025-10-01 04:27:05.853329', 'step': 11518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:05.915575', 'step': 11518, 'epoch': 2} {'type': 'loss', 'content': 0.13588373363018036, 'timestamp': '2025-10-01 04:27:05.918667', 'step': 11519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:05.949680', 'step': 11519, 'epoch': 2} {'type': 'loss', 'content': 0.14995741844177246, 'timestamp': '2025-10-01 04:27:05.973599', 'step': 11520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:06.003495', 'step': 11520, 'epoch': 2} {'type': 'loss', 'content': 0.14296703040599823, 'timestamp': '2025-10-01 04:27:06.006233', 'step': 11521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:06.037461', 'step': 11521, 'epoch': 2} {'type': 'loss', 'content': 0.13213196396827698, 'timestamp': '2025-10-01 04:27:06.040433', 'step': 11522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.071299', 'step': 11522, 'epoch': 2} {'type': 'loss', 'content': 0.11366549879312515, 'timestamp': '2025-10-01 04:27:06.074449', 'step': 11523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:06.105188', 'step': 11523, 'epoch': 2} {'type': 'loss', 'content': 0.13247698545455933, 'timestamp': '2025-10-01 04:27:06.129224', 'step': 11524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.166688', 'step': 11524, 'epoch': 2} {'type': 'loss', 'content': 0.07481958717107773, 'timestamp': '2025-10-01 04:27:06.170566', 'step': 11525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.208647', 'step': 11525, 'epoch': 2} {'type': 'loss', 'content': 0.09598510712385178, 'timestamp': '2025-10-01 04:27:06.210882', 'step': 11526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:06.243720', 'step': 11526, 'epoch': 2} {'type': 'loss', 'content': 0.084699347615242, 'timestamp': '2025-10-01 04:27:06.255560', 'step': 11527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.286205', 'step': 11527, 'epoch': 2} {'type': 'loss', 'content': 0.08953829854726791, 'timestamp': '2025-10-01 04:27:06.314521', 'step': 11528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:06.348523', 'step': 11528, 'epoch': 2} {'type': 'loss', 'content': 0.11546934396028519, 'timestamp': '2025-10-01 04:27:06.354825', 'step': 11529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:06.386258', 'step': 11529, 'epoch': 2} {'type': 'loss', 'content': 0.06820549070835114, 'timestamp': '2025-10-01 04:27:06.389049', 'step': 11530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.421236', 'step': 11530, 'epoch': 2} {'type': 'loss', 'content': 0.07156150788068771, 'timestamp': '2025-10-01 04:27:06.424151', 'step': 11531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.456179', 'step': 11531, 'epoch': 2} {'type': 'loss', 'content': 0.09822166711091995, 'timestamp': '2025-10-01 04:27:06.481041', 'step': 11532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.512314', 'step': 11532, 'epoch': 2} {'type': 'loss', 'content': 0.09846009314060211, 'timestamp': '2025-10-01 04:27:06.515242', 'step': 11533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:06.547624', 'step': 11533, 'epoch': 2} {'type': 'loss', 'content': 0.05177729204297066, 'timestamp': '2025-10-01 04:27:06.550345', 'step': 11534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:06.581744', 'step': 11534, 'epoch': 2} {'type': 'loss', 'content': 0.11586389690637589, 'timestamp': '2025-10-01 04:27:06.596565', 'step': 11535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:06.627696', 'step': 11535, 'epoch': 2} {'type': 'loss', 'content': 0.13145415484905243, 'timestamp': '2025-10-01 04:27:06.651905', 'step': 11536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:06.682918', 'step': 11536, 'epoch': 2} {'type': 'loss', 'content': 0.12788598239421844, 'timestamp': '2025-10-01 04:27:06.685903', 'step': 11537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.723161', 'step': 11537, 'epoch': 2} {'type': 'loss', 'content': 0.07696134597063065, 'timestamp': '2025-10-01 04:27:06.725531', 'step': 11538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.756191', 'step': 11538, 'epoch': 2} {'type': 'loss', 'content': 0.04590466618537903, 'timestamp': '2025-10-01 04:27:06.759142', 'step': 11539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.790213', 'step': 11539, 'epoch': 2} {'type': 'loss', 'content': 0.16992411017417908, 'timestamp': '2025-10-01 04:27:06.823811', 'step': 11540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.870309', 'step': 11540, 'epoch': 2} {'type': 'loss', 'content': 0.07773551344871521, 'timestamp': '2025-10-01 04:27:06.876387', 'step': 11541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.912588', 'step': 11541, 'epoch': 2} {'type': 'loss', 'content': 0.13414175808429718, 'timestamp': '2025-10-01 04:27:06.924325', 'step': 11542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:06.960210', 'step': 11542, 'epoch': 2} {'type': 'loss', 'content': 0.06784077733755112, 'timestamp': '2025-10-01 04:27:06.962747', 'step': 11543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:07.014417', 'step': 11543, 'epoch': 2} {'type': 'loss', 'content': 0.09000810980796814, 'timestamp': '2025-10-01 04:27:07.039764', 'step': 11544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:07.074090', 'step': 11544, 'epoch': 2} {'type': 'loss', 'content': 0.13801786303520203, 'timestamp': '2025-10-01 04:27:07.076847', 'step': 11545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:07.127929', 'step': 11545, 'epoch': 2} {'type': 'loss', 'content': 0.09468696266412735, 'timestamp': '2025-10-01 04:27:07.130227', 'step': 11546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:07.163566', 'step': 11546, 'epoch': 2} {'type': 'loss', 'content': 0.12576824426651, 'timestamp': '2025-10-01 04:27:07.167956', 'step': 11547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:07.201344', 'step': 11547, 'epoch': 2} {'type': 'loss', 'content': 0.12224561721086502, 'timestamp': '2025-10-01 04:27:07.225377', 'step': 11548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:07.266402', 'step': 11548, 'epoch': 2} {'type': 'loss', 'content': 0.0617704875767231, 'timestamp': '2025-10-01 04:27:07.269486', 'step': 11549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:07.306255', 'step': 11549, 'epoch': 2} {'type': 'loss', 'content': 0.03791173920035362, 'timestamp': '2025-10-01 04:27:07.308508', 'step': 11550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:07.340809', 'step': 11550, 'epoch': 2} {'type': 'loss', 'content': 0.05811014026403427, 'timestamp': '2025-10-01 04:27:07.343082', 'step': 11551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:07.386098', 'step': 11551, 'epoch': 2} {'type': 'loss', 'content': 0.06812243163585663, 'timestamp': '2025-10-01 04:27:07.409875', 'step': 11552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:07.444910', 'step': 11552, 'epoch': 2} {'type': 'loss', 'content': 0.07020466029644012, 'timestamp': '2025-10-01 04:27:07.446966', 'step': 11553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:07.478538', 'step': 11553, 'epoch': 2} {'type': 'loss', 'content': 0.05577809736132622, 'timestamp': '2025-10-01 04:27:07.481060', 'step': 11554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:07.523590', 'step': 11554, 'epoch': 2} {'type': 'loss', 'content': 0.09748797118663788, 'timestamp': '2025-10-01 04:27:07.525951', 'step': 11555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:07.562314', 'step': 11555, 'epoch': 2} {'type': 'loss', 'content': 0.13940033316612244, 'timestamp': '2025-10-01 04:27:07.586312', 'step': 11556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:07.620428', 'step': 11556, 'epoch': 2} {'type': 'loss', 'content': 0.07471872121095657, 'timestamp': '2025-10-01 04:27:07.622735', 'step': 11557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:07.659822', 'step': 11557, 'epoch': 2} {'type': 'loss', 'content': 0.10923050343990326, 'timestamp': '2025-10-01 04:27:07.666537', 'step': 11558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:07.697079', 'step': 11558, 'epoch': 2} {'type': 'loss', 'content': 0.15056511759757996, 'timestamp': '2025-10-01 04:27:07.699377', 'step': 11559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:07.756858', 'step': 11559, 'epoch': 2} {'type': 'loss', 'content': 0.06578859686851501, 'timestamp': '2025-10-01 04:27:07.780830', 'step': 11560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:07.813131', 'step': 11560, 'epoch': 2} {'type': 'loss', 'content': 0.06887025386095047, 'timestamp': '2025-10-01 04:27:07.815569', 'step': 11561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:07.856689', 'step': 11561, 'epoch': 2} {'type': 'loss', 'content': 0.09510967880487442, 'timestamp': '2025-10-01 04:27:07.860063', 'step': 11562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:07.891315', 'step': 11562, 'epoch': 2} {'type': 'loss', 'content': 0.1265704333782196, 'timestamp': '2025-10-01 04:27:07.894462', 'step': 11563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:07.925782', 'step': 11563, 'epoch': 2} {'type': 'loss', 'content': 0.09309546649456024, 'timestamp': '2025-10-01 04:27:07.949633', 'step': 11564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:07.983560', 'step': 11564, 'epoch': 2} {'type': 'loss', 'content': 0.05145380273461342, 'timestamp': '2025-10-01 04:27:07.986006', 'step': 11565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:08.018028', 'step': 11565, 'epoch': 2} {'type': 'loss', 'content': 0.14133453369140625, 'timestamp': '2025-10-01 04:27:08.022098', 'step': 11566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:08.056553', 'step': 11566, 'epoch': 2} {'type': 'loss', 'content': 0.15886999666690826, 'timestamp': '2025-10-01 04:27:08.058977', 'step': 11567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:08.091369', 'step': 11567, 'epoch': 2} {'type': 'loss', 'content': 0.08340994268655777, 'timestamp': '2025-10-01 04:27:08.117302', 'step': 11568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:08.149884', 'step': 11568, 'epoch': 2} {'type': 'loss', 'content': 0.10526535660028458, 'timestamp': '2025-10-01 04:27:08.153174', 'step': 11569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:08.185805', 'step': 11569, 'epoch': 2} {'type': 'loss', 'content': 0.09914316236972809, 'timestamp': '2025-10-01 04:27:08.188561', 'step': 11570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:08.219622', 'step': 11570, 'epoch': 2} {'type': 'loss', 'content': 0.1844811588525772, 'timestamp': '2025-10-01 04:27:08.221972', 'step': 11571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:08.268107', 'step': 11571, 'epoch': 2} {'type': 'loss', 'content': 0.05994796007871628, 'timestamp': '2025-10-01 04:27:08.291837', 'step': 11572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:08.327790', 'step': 11572, 'epoch': 2} {'type': 'loss', 'content': 0.0907864198088646, 'timestamp': '2025-10-01 04:27:08.330103', 'step': 11573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:08.362952', 'step': 11573, 'epoch': 2} {'type': 'loss', 'content': 0.07550613582134247, 'timestamp': '2025-10-01 04:27:08.365884', 'step': 11574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:08.398420', 'step': 11574, 'epoch': 2} {'type': 'loss', 'content': 0.14269201457500458, 'timestamp': '2025-10-01 04:27:08.404957', 'step': 11575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:08.443156', 'step': 11575, 'epoch': 2} {'type': 'loss', 'content': 0.01353227999061346, 'timestamp': '2025-10-01 04:27:08.469769', 'step': 11576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:08.507876', 'step': 11576, 'epoch': 2} {'type': 'loss', 'content': 0.11932004243135452, 'timestamp': '2025-10-01 04:27:08.510359', 'step': 11577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:08.542996', 'step': 11577, 'epoch': 2} {'type': 'loss', 'content': 0.05982939153909683, 'timestamp': '2025-10-01 04:27:08.545499', 'step': 11578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:08.582303', 'step': 11578, 'epoch': 2} {'type': 'loss', 'content': 0.1337587535381317, 'timestamp': '2025-10-01 04:27:08.597979', 'step': 11579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:08.630808', 'step': 11579, 'epoch': 2} {'type': 'loss', 'content': 0.030185123905539513, 'timestamp': '2025-10-01 04:27:08.667704', 'step': 11580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:08.707908', 'step': 11580, 'epoch': 2} {'type': 'loss', 'content': 0.15887540578842163, 'timestamp': '2025-10-01 04:27:08.710508', 'step': 11581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:08.747267', 'step': 11581, 'epoch': 2} {'type': 'loss', 'content': 0.07988379150629044, 'timestamp': '2025-10-01 04:27:08.750381', 'step': 11582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:08.782060', 'step': 11582, 'epoch': 2} {'type': 'loss', 'content': 0.09616673737764359, 'timestamp': '2025-10-01 04:27:08.795975', 'step': 11583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:08.828147', 'step': 11583, 'epoch': 2} {'type': 'loss', 'content': 0.12353109568357468, 'timestamp': '2025-10-01 04:27:08.852200', 'step': 11584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:08.884863', 'step': 11584, 'epoch': 2} {'type': 'loss', 'content': 0.1221369057893753, 'timestamp': '2025-10-01 04:27:08.889498', 'step': 11585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:08.927262', 'step': 11585, 'epoch': 2} {'type': 'loss', 'content': 0.13662204146385193, 'timestamp': '2025-10-01 04:27:08.929466', 'step': 11586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:08.963435', 'step': 11586, 'epoch': 2} {'type': 'loss', 'content': 0.07827986031770706, 'timestamp': '2025-10-01 04:27:08.965741', 'step': 11587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:08.998639', 'step': 11587, 'epoch': 2} {'type': 'loss', 'content': 0.04753652215003967, 'timestamp': '2025-10-01 04:27:09.022438', 'step': 11588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:09.053880', 'step': 11588, 'epoch': 2} {'type': 'loss', 'content': 0.037580184638500214, 'timestamp': '2025-10-01 04:27:09.056107', 'step': 11589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:09.094897', 'step': 11589, 'epoch': 2} {'type': 'loss', 'content': 0.15442781150341034, 'timestamp': '2025-10-01 04:27:09.097282', 'step': 11590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:09.129349', 'step': 11590, 'epoch': 2} {'type': 'loss', 'content': 0.12176084518432617, 'timestamp': '2025-10-01 04:27:09.131907', 'step': 11591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:09.164913', 'step': 11591, 'epoch': 2} {'type': 'loss', 'content': 0.0892648920416832, 'timestamp': '2025-10-01 04:27:09.193927', 'step': 11592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:09.226618', 'step': 11592, 'epoch': 2} {'type': 'loss', 'content': 0.12892985343933105, 'timestamp': '2025-10-01 04:27:09.229015', 'step': 11593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:09.270508', 'step': 11593, 'epoch': 2} {'type': 'loss', 'content': 0.15557394921779633, 'timestamp': '2025-10-01 04:27:09.272915', 'step': 11594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:09.312098', 'step': 11594, 'epoch': 2} {'type': 'loss', 'content': 0.0799373984336853, 'timestamp': '2025-10-01 04:27:09.331899', 'step': 11595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:09.371437', 'step': 11595, 'epoch': 2} {'type': 'loss', 'content': 0.1704505831003189, 'timestamp': '2025-10-01 04:27:09.395234', 'step': 11596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:09.441349', 'step': 11596, 'epoch': 2} {'type': 'loss', 'content': 0.10289938002824783, 'timestamp': '2025-10-01 04:27:09.443750', 'step': 11597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:09.486750', 'step': 11597, 'epoch': 2} {'type': 'loss', 'content': 0.13440188765525818, 'timestamp': '2025-10-01 04:27:09.489210', 'step': 11598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:09.522791', 'step': 11598, 'epoch': 2} {'type': 'loss', 'content': 0.14655756950378418, 'timestamp': '2025-10-01 04:27:09.525189', 'step': 11599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:09.557524', 'step': 11599, 'epoch': 2} {'type': 'loss', 'content': 0.06343545764684677, 'timestamp': '2025-10-01 04:27:09.581307', 'step': 11600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:09.614620', 'step': 11600, 'epoch': 2} {'type': 'loss', 'content': 0.04170217365026474, 'timestamp': '2025-10-01 04:27:09.617044', 'step': 11601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:09.651119', 'step': 11601, 'epoch': 2} {'type': 'loss', 'content': 0.039313651621341705, 'timestamp': '2025-10-01 04:27:09.653445', 'step': 11602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:09.701997', 'step': 11602, 'epoch': 2} {'type': 'loss', 'content': 0.08939049392938614, 'timestamp': '2025-10-01 04:27:09.705324', 'step': 11603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:09.737199', 'step': 11603, 'epoch': 2} {'type': 'loss', 'content': 0.12525810301303864, 'timestamp': '2025-10-01 04:27:09.761125', 'step': 11604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:09.800212', 'step': 11604, 'epoch': 2} {'type': 'loss', 'content': 0.0957522764801979, 'timestamp': '2025-10-01 04:27:09.802572', 'step': 11605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:09.834111', 'step': 11605, 'epoch': 2} {'type': 'loss', 'content': 0.10326012223958969, 'timestamp': '2025-10-01 04:27:09.836814', 'step': 11606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:09.870778', 'step': 11606, 'epoch': 2} {'type': 'loss', 'content': 0.10084651410579681, 'timestamp': '2025-10-01 04:27:09.873718', 'step': 11607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:09.908029', 'step': 11607, 'epoch': 2} {'type': 'loss', 'content': 0.06308375298976898, 'timestamp': '2025-10-01 04:27:09.931832', 'step': 11608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:09.964395', 'step': 11608, 'epoch': 2} {'type': 'loss', 'content': 0.07542075216770172, 'timestamp': '2025-10-01 04:27:09.966812', 'step': 11609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:09.999247', 'step': 11609, 'epoch': 2} {'type': 'loss', 'content': 0.08376254886388779, 'timestamp': '2025-10-01 04:27:10.001576', 'step': 11610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:10.035362', 'step': 11610, 'epoch': 2} {'type': 'loss', 'content': 0.12265554815530777, 'timestamp': '2025-10-01 04:27:10.037699', 'step': 11611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:10.075378', 'step': 11611, 'epoch': 2} {'type': 'loss', 'content': 0.06623594462871552, 'timestamp': '2025-10-01 04:27:10.099248', 'step': 11612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:10.131989', 'step': 11612, 'epoch': 2} {'type': 'loss', 'content': 0.11565686762332916, 'timestamp': '2025-10-01 04:27:10.134472', 'step': 11613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:10.166792', 'step': 11613, 'epoch': 2} {'type': 'loss', 'content': 0.12097957730293274, 'timestamp': '2025-10-01 04:27:10.176778', 'step': 11614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:10.209367', 'step': 11614, 'epoch': 2} {'type': 'loss', 'content': 0.039196863770484924, 'timestamp': '2025-10-01 04:27:10.211759', 'step': 11615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:10.254316', 'step': 11615, 'epoch': 2} {'type': 'loss', 'content': 0.07737469673156738, 'timestamp': '2025-10-01 04:27:10.278499', 'step': 11616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:10.312463', 'step': 11616, 'epoch': 2} {'type': 'loss', 'content': 0.16176708042621613, 'timestamp': '2025-10-01 04:27:10.314845', 'step': 11617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:10.349018', 'step': 11617, 'epoch': 2} {'type': 'loss', 'content': 0.08684753626585007, 'timestamp': '2025-10-01 04:27:10.351894', 'step': 11618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:10.390354', 'step': 11618, 'epoch': 2} {'type': 'loss', 'content': 0.11385522782802582, 'timestamp': '2025-10-01 04:27:10.393066', 'step': 11619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:10.426616', 'step': 11619, 'epoch': 2} {'type': 'loss', 'content': 0.1482885479927063, 'timestamp': '2025-10-01 04:27:10.451257', 'step': 11620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:10.491114', 'step': 11620, 'epoch': 2} {'type': 'loss', 'content': 0.06785809248685837, 'timestamp': '2025-10-01 04:27:10.504577', 'step': 11621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:10.537558', 'step': 11621, 'epoch': 2} {'type': 'loss', 'content': 0.13674788177013397, 'timestamp': '2025-10-01 04:27:10.540528', 'step': 11622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:10.573685', 'step': 11622, 'epoch': 2} {'type': 'loss', 'content': 0.14477330446243286, 'timestamp': '2025-10-01 04:27:10.577750', 'step': 11623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:10.611765', 'step': 11623, 'epoch': 2} {'type': 'loss', 'content': 0.024835852906107903, 'timestamp': '2025-10-01 04:27:10.636064', 'step': 11624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:10.675655', 'step': 11624, 'epoch': 2} {'type': 'loss', 'content': 0.10490741580724716, 'timestamp': '2025-10-01 04:27:10.678948', 'step': 11625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:10.711892', 'step': 11625, 'epoch': 2} {'type': 'loss', 'content': 0.1287938952445984, 'timestamp': '2025-10-01 04:27:10.718074', 'step': 11626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:10.750729', 'step': 11626, 'epoch': 2} {'type': 'loss', 'content': 0.1001235842704773, 'timestamp': '2025-10-01 04:27:10.753310', 'step': 11627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:10.792852', 'step': 11627, 'epoch': 2} {'type': 'loss', 'content': 0.048991184681653976, 'timestamp': '2025-10-01 04:27:10.816712', 'step': 11628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:10.855303', 'step': 11628, 'epoch': 2} {'type': 'loss', 'content': 0.050276979804039, 'timestamp': '2025-10-01 04:27:10.857648', 'step': 11629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:10.888201', 'step': 11629, 'epoch': 2} {'type': 'loss', 'content': 0.08163054287433624, 'timestamp': '2025-10-01 04:27:10.890641', 'step': 11630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:10.926843', 'step': 11630, 'epoch': 2} {'type': 'loss', 'content': 0.027795452624559402, 'timestamp': '2025-10-01 04:27:10.929756', 'step': 11631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:10.960663', 'step': 11631, 'epoch': 2} {'type': 'loss', 'content': 0.11431080102920532, 'timestamp': '2025-10-01 04:27:10.989684', 'step': 11632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:11.025977', 'step': 11632, 'epoch': 2} {'type': 'loss', 'content': 0.12003985047340393, 'timestamp': '2025-10-01 04:27:11.028369', 'step': 11633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:11.061035', 'step': 11633, 'epoch': 2} {'type': 'loss', 'content': 0.07000477612018585, 'timestamp': '2025-10-01 04:27:11.063402', 'step': 11634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:11.096904', 'step': 11634, 'epoch': 2} {'type': 'loss', 'content': 0.07216568291187286, 'timestamp': '2025-10-01 04:27:11.099293', 'step': 11635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:11.133018', 'step': 11635, 'epoch': 2} {'type': 'loss', 'content': 0.06691427528858185, 'timestamp': '2025-10-01 04:27:11.156785', 'step': 11636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:11.190867', 'step': 11636, 'epoch': 2} {'type': 'loss', 'content': 0.09313538670539856, 'timestamp': '2025-10-01 04:27:11.193292', 'step': 11637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:11.232225', 'step': 11637, 'epoch': 2} {'type': 'loss', 'content': 0.18746936321258545, 'timestamp': '2025-10-01 04:27:11.234795', 'step': 11638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:11.274627', 'step': 11638, 'epoch': 2} {'type': 'loss', 'content': 0.1351473182439804, 'timestamp': '2025-10-01 04:27:11.277429', 'step': 11639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:11.315599', 'step': 11639, 'epoch': 2} {'type': 'loss', 'content': 0.10857787728309631, 'timestamp': '2025-10-01 04:27:11.339588', 'step': 11640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:11.373666', 'step': 11640, 'epoch': 2} {'type': 'loss', 'content': 0.03283770754933357, 'timestamp': '2025-10-01 04:27:11.375787', 'step': 11641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:11.428213', 'step': 11641, 'epoch': 2} {'type': 'loss', 'content': 0.08067360520362854, 'timestamp': '2025-10-01 04:27:11.430629', 'step': 11642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:11.473684', 'step': 11642, 'epoch': 2} {'type': 'loss', 'content': 0.15742002427577972, 'timestamp': '2025-10-01 04:27:11.475954', 'step': 11643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:11.507627', 'step': 11643, 'epoch': 2} {'type': 'loss', 'content': 0.08331116288900375, 'timestamp': '2025-10-01 04:27:11.532462', 'step': 11644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:11.565210', 'step': 11644, 'epoch': 2} {'type': 'loss', 'content': 0.11240626871585846, 'timestamp': '2025-10-01 04:27:11.567560', 'step': 11645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:11.606701', 'step': 11645, 'epoch': 2} {'type': 'loss', 'content': 0.09613726288080215, 'timestamp': '2025-10-01 04:27:11.609010', 'step': 11646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:11.640859', 'step': 11646, 'epoch': 2} {'type': 'loss', 'content': 0.061781320720911026, 'timestamp': '2025-10-01 04:27:11.653934', 'step': 11647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:11.686483', 'step': 11647, 'epoch': 2} {'type': 'loss', 'content': 0.07753213495016098, 'timestamp': '2025-10-01 04:27:11.710865', 'step': 11648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:11.742422', 'step': 11648, 'epoch': 2} {'type': 'loss', 'content': 0.03280168026685715, 'timestamp': '2025-10-01 04:27:11.745384', 'step': 11649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:11.779505', 'step': 11649, 'epoch': 2} {'type': 'loss', 'content': 0.1281961053609848, 'timestamp': '2025-10-01 04:27:11.781944', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:27:22.380749', 'step': 11650, 'epoch': 2} {'type': 'pplx', 'content': 12260.180466864022, 'timestamp': '2025-10-01 04:27:22.384294', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:22.415385', 'step': 11650, 'epoch': 2} {'type': 'loss', 'content': 0.0854533389210701, 'timestamp': '2025-10-01 04:27:22.417719', 'step': 11651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:22.459608', 'step': 11651, 'epoch': 2} {'type': 'loss', 'content': 0.11829139292240143, 'timestamp': '2025-10-01 04:27:22.483615', 'step': 11652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:22.516768', 'step': 11652, 'epoch': 2} {'type': 'loss', 'content': 0.10397271811962128, 'timestamp': '2025-10-01 04:27:22.519458', 'step': 11653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:22.553474', 'step': 11653, 'epoch': 2} {'type': 'loss', 'content': 0.17052245140075684, 'timestamp': '2025-10-01 04:27:22.556442', 'step': 11654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:22.597811', 'step': 11654, 'epoch': 2} {'type': 'loss', 'content': 0.08752274513244629, 'timestamp': '2025-10-01 04:27:22.600603', 'step': 11655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:22.650921', 'step': 11655, 'epoch': 2} {'type': 'loss', 'content': 0.08091197907924652, 'timestamp': '2025-10-01 04:27:22.674796', 'step': 11656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:22.707145', 'step': 11656, 'epoch': 2} {'type': 'loss', 'content': 0.10271409153938293, 'timestamp': '2025-10-01 04:27:22.710066', 'step': 11657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:22.741586', 'step': 11657, 'epoch': 2} {'type': 'loss', 'content': 0.0731293186545372, 'timestamp': '2025-10-01 04:27:22.744587', 'step': 11658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:22.776615', 'step': 11658, 'epoch': 2} {'type': 'loss', 'content': 0.07481618225574493, 'timestamp': '2025-10-01 04:27:22.779135', 'step': 11659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:22.816035', 'step': 11659, 'epoch': 2} {'type': 'loss', 'content': 0.09997780621051788, 'timestamp': '2025-10-01 04:27:22.840250', 'step': 11660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:22.872895', 'step': 11660, 'epoch': 2} {'type': 'loss', 'content': 0.08444923907518387, 'timestamp': '2025-10-01 04:27:22.875944', 'step': 11661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:22.912797', 'step': 11661, 'epoch': 2} {'type': 'loss', 'content': 0.18362760543823242, 'timestamp': '2025-10-01 04:27:22.915466', 'step': 11662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:22.950714', 'step': 11662, 'epoch': 2} {'type': 'loss', 'content': 0.10699683427810669, 'timestamp': '2025-10-01 04:27:22.953460', 'step': 11663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:22.987215', 'step': 11663, 'epoch': 2} {'type': 'loss', 'content': 0.15639254450798035, 'timestamp': '2025-10-01 04:27:23.011376', 'step': 11664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.058747', 'step': 11664, 'epoch': 2} {'type': 'loss', 'content': 0.055748820304870605, 'timestamp': '2025-10-01 04:27:23.061275', 'step': 11665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.093959', 'step': 11665, 'epoch': 2} {'type': 'loss', 'content': 0.12382133305072784, 'timestamp': '2025-10-01 04:27:23.096420', 'step': 11666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.158803', 'step': 11666, 'epoch': 2} {'type': 'loss', 'content': 0.07921920716762543, 'timestamp': '2025-10-01 04:27:23.161294', 'step': 11667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.193719', 'step': 11667, 'epoch': 2} {'type': 'loss', 'content': 0.0744129940867424, 'timestamp': '2025-10-01 04:27:23.217460', 'step': 11668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:23.255429', 'step': 11668, 'epoch': 2} {'type': 'loss', 'content': 0.1494561731815338, 'timestamp': '2025-10-01 04:27:23.257760', 'step': 11669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:23.297511', 'step': 11669, 'epoch': 2} {'type': 'loss', 'content': 0.12769795954227448, 'timestamp': '2025-10-01 04:27:23.300034', 'step': 11670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.352465', 'step': 11670, 'epoch': 2} {'type': 'loss', 'content': 0.06040363386273384, 'timestamp': '2025-10-01 04:27:23.355073', 'step': 11671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.396259', 'step': 11671, 'epoch': 2} {'type': 'loss', 'content': 0.2404770702123642, 'timestamp': '2025-10-01 04:27:23.421423', 'step': 11672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.472214', 'step': 11672, 'epoch': 2} {'type': 'loss', 'content': 0.059583596885204315, 'timestamp': '2025-10-01 04:27:23.476681', 'step': 11673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:23.523040', 'step': 11673, 'epoch': 2} {'type': 'loss', 'content': 0.051139745861291885, 'timestamp': '2025-10-01 04:27:23.525594', 'step': 11674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.596259', 'step': 11674, 'epoch': 2} {'type': 'loss', 'content': 0.15372265875339508, 'timestamp': '2025-10-01 04:27:23.598654', 'step': 11675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:23.651645', 'step': 11675, 'epoch': 2} {'type': 'loss', 'content': 0.13424153625965118, 'timestamp': '2025-10-01 04:27:23.675360', 'step': 11676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.711071', 'step': 11676, 'epoch': 2} {'type': 'loss', 'content': 0.11860082298517227, 'timestamp': '2025-10-01 04:27:23.713274', 'step': 11677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:23.748978', 'step': 11677, 'epoch': 2} {'type': 'loss', 'content': 0.09746133536100388, 'timestamp': '2025-10-01 04:27:23.754006', 'step': 11678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.790076', 'step': 11678, 'epoch': 2} {'type': 'loss', 'content': 0.12075289338827133, 'timestamp': '2025-10-01 04:27:23.792250', 'step': 11679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:23.828881', 'step': 11679, 'epoch': 2} {'type': 'loss', 'content': 0.09927087277173996, 'timestamp': '2025-10-01 04:27:23.856865', 'step': 11680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:23.892616', 'step': 11680, 'epoch': 2} {'type': 'loss', 'content': 0.06323260813951492, 'timestamp': '2025-10-01 04:27:23.894805', 'step': 11681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:23.927576', 'step': 11681, 'epoch': 2} {'type': 'loss', 'content': 0.055565230548381805, 'timestamp': '2025-10-01 04:27:23.929803', 'step': 11682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:23.973508', 'step': 11682, 'epoch': 2} {'type': 'loss', 'content': 0.14217609167099, 'timestamp': '2025-10-01 04:27:23.975709', 'step': 11683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:24.022710', 'step': 11683, 'epoch': 2} {'type': 'loss', 'content': 0.14092570543289185, 'timestamp': '2025-10-01 04:27:24.046404', 'step': 11684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:24.079301', 'step': 11684, 'epoch': 2} {'type': 'loss', 'content': 0.07828067243099213, 'timestamp': '2025-10-01 04:27:24.081619', 'step': 11685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:24.118406', 'step': 11685, 'epoch': 2} {'type': 'loss', 'content': 0.11435258388519287, 'timestamp': '2025-10-01 04:27:24.121506', 'step': 11686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:24.157355', 'step': 11686, 'epoch': 2} {'type': 'loss', 'content': 0.1738470494747162, 'timestamp': '2025-10-01 04:27:24.159645', 'step': 11687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:24.199284', 'step': 11687, 'epoch': 2} {'type': 'loss', 'content': 0.14839260280132294, 'timestamp': '2025-10-01 04:27:24.222869', 'step': 11688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:24.255055', 'step': 11688, 'epoch': 2} {'type': 'loss', 'content': 0.06109706684947014, 'timestamp': '2025-10-01 04:27:24.257231', 'step': 11689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:24.291243', 'step': 11689, 'epoch': 2} {'type': 'loss', 'content': 0.06475604325532913, 'timestamp': '2025-10-01 04:27:24.293392', 'step': 11690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:24.331621', 'step': 11690, 'epoch': 2} {'type': 'loss', 'content': 0.052419956773519516, 'timestamp': '2025-10-01 04:27:24.333741', 'step': 11691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:24.365016', 'step': 11691, 'epoch': 2} {'type': 'loss', 'content': 0.1344263106584549, 'timestamp': '2025-10-01 04:27:24.389264', 'step': 11692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:24.421053', 'step': 11692, 'epoch': 2} {'type': 'loss', 'content': 0.1526445895433426, 'timestamp': '2025-10-01 04:27:24.423372', 'step': 11693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:24.457934', 'step': 11693, 'epoch': 2} {'type': 'loss', 'content': 0.16051968932151794, 'timestamp': '2025-10-01 04:27:24.460131', 'step': 11694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:24.502299', 'step': 11694, 'epoch': 2} {'type': 'loss', 'content': 0.2390751838684082, 'timestamp': '2025-10-01 04:27:24.504503', 'step': 11695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:24.536597', 'step': 11695, 'epoch': 2} {'type': 'loss', 'content': 0.19918738305568695, 'timestamp': '2025-10-01 04:27:24.560356', 'step': 11696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:24.590643', 'step': 11696, 'epoch': 2} {'type': 'loss', 'content': 0.09732107818126678, 'timestamp': '2025-10-01 04:27:24.594205', 'step': 11697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:24.628804', 'step': 11697, 'epoch': 2} {'type': 'loss', 'content': 0.0942491665482521, 'timestamp': '2025-10-01 04:27:24.631420', 'step': 11698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:24.666250', 'step': 11698, 'epoch': 2} {'type': 'loss', 'content': 0.04926091805100441, 'timestamp': '2025-10-01 04:27:24.668361', 'step': 11699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:24.702987', 'step': 11699, 'epoch': 2} {'type': 'loss', 'content': 0.017627302557229996, 'timestamp': '2025-10-01 04:27:24.726818', 'step': 11700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:24.759342', 'step': 11700, 'epoch': 2} {'type': 'loss', 'content': 0.05689296871423721, 'timestamp': '2025-10-01 04:27:24.761584', 'step': 11701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:24.793328', 'step': 11701, 'epoch': 2} {'type': 'loss', 'content': 0.05162768438458443, 'timestamp': '2025-10-01 04:27:24.795810', 'step': 11702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:24.829644', 'step': 11702, 'epoch': 2} {'type': 'loss', 'content': 0.13861948251724243, 'timestamp': '2025-10-01 04:27:24.832095', 'step': 11703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:24.869330', 'step': 11703, 'epoch': 2} {'type': 'loss', 'content': 0.11458087712526321, 'timestamp': '2025-10-01 04:27:24.893095', 'step': 11704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:24.934803', 'step': 11704, 'epoch': 2} {'type': 'loss', 'content': 0.06292326748371124, 'timestamp': '2025-10-01 04:27:24.937923', 'step': 11705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:24.975499', 'step': 11705, 'epoch': 2} {'type': 'loss', 'content': 0.09438573569059372, 'timestamp': '2025-10-01 04:27:24.977946', 'step': 11706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:25.035932', 'step': 11706, 'epoch': 2} {'type': 'loss', 'content': 0.17948299646377563, 'timestamp': '2025-10-01 04:27:25.038122', 'step': 11707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:25.076019', 'step': 11707, 'epoch': 2} {'type': 'loss', 'content': 0.12867793440818787, 'timestamp': '2025-10-01 04:27:25.100082', 'step': 11708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:25.151810', 'step': 11708, 'epoch': 2} {'type': 'loss', 'content': 0.1716066598892212, 'timestamp': '2025-10-01 04:27:25.153994', 'step': 11709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:25.206587', 'step': 11709, 'epoch': 2} {'type': 'loss', 'content': 0.15511251986026764, 'timestamp': '2025-10-01 04:27:25.209061', 'step': 11710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:25.244910', 'step': 11710, 'epoch': 2} {'type': 'loss', 'content': 0.10340630263090134, 'timestamp': '2025-10-01 04:27:25.247224', 'step': 11711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:25.285204', 'step': 11711, 'epoch': 2} {'type': 'loss', 'content': 0.058996886014938354, 'timestamp': '2025-10-01 04:27:25.308980', 'step': 11712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:25.347319', 'step': 11712, 'epoch': 2} {'type': 'loss', 'content': 0.10790885984897614, 'timestamp': '2025-10-01 04:27:25.362055', 'step': 11713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:25.412120', 'step': 11713, 'epoch': 2} {'type': 'loss', 'content': 0.15771500766277313, 'timestamp': '2025-10-01 04:27:25.414776', 'step': 11714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:25.453804', 'step': 11714, 'epoch': 2} {'type': 'loss', 'content': 0.11537209153175354, 'timestamp': '2025-10-01 04:27:25.459286', 'step': 11715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:27:25.496880', 'step': 11715, 'epoch': 2} {'type': 'loss', 'content': 0.06814311444759369, 'timestamp': '2025-10-01 04:27:25.521998', 'step': 11716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:25.557791', 'step': 11716, 'epoch': 2} {'type': 'loss', 'content': 0.1834816187620163, 'timestamp': '2025-10-01 04:27:25.559971', 'step': 11717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:25.596282', 'step': 11717, 'epoch': 2} {'type': 'loss', 'content': 0.20269151031970978, 'timestamp': '2025-10-01 04:27:25.598449', 'step': 11718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:25.643227', 'step': 11718, 'epoch': 2} {'type': 'loss', 'content': 0.0993567705154419, 'timestamp': '2025-10-01 04:27:25.645429', 'step': 11719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:25.693338', 'step': 11719, 'epoch': 2} {'type': 'loss', 'content': 0.08157699555158615, 'timestamp': '2025-10-01 04:27:25.717374', 'step': 11720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:25.755222', 'step': 11720, 'epoch': 2} {'type': 'loss', 'content': 0.1655239313840866, 'timestamp': '2025-10-01 04:27:25.757528', 'step': 11721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:25.792945', 'step': 11721, 'epoch': 2} {'type': 'loss', 'content': 0.08730713278055191, 'timestamp': '2025-10-01 04:27:25.795252', 'step': 11722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:25.846615', 'step': 11722, 'epoch': 2} {'type': 'loss', 'content': 0.11865700781345367, 'timestamp': '2025-10-01 04:27:25.849233', 'step': 11723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:25.897969', 'step': 11723, 'epoch': 2} {'type': 'loss', 'content': 0.06328597664833069, 'timestamp': '2025-10-01 04:27:25.921988', 'step': 11724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:25.970101', 'step': 11724, 'epoch': 2} {'type': 'loss', 'content': 0.08651690930128098, 'timestamp': '2025-10-01 04:27:25.972212', 'step': 11725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:26.017806', 'step': 11725, 'epoch': 2} {'type': 'loss', 'content': 0.19057884812355042, 'timestamp': '2025-10-01 04:27:26.020432', 'step': 11726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:26.061351', 'step': 11726, 'epoch': 2} {'type': 'loss', 'content': 0.08300533145666122, 'timestamp': '2025-10-01 04:27:26.064180', 'step': 11727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:26.097211', 'step': 11727, 'epoch': 2} {'type': 'loss', 'content': 0.12040621787309647, 'timestamp': '2025-10-01 04:27:26.121042', 'step': 11728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:26.155870', 'step': 11728, 'epoch': 2} {'type': 'loss', 'content': 0.1477387547492981, 'timestamp': '2025-10-01 04:27:26.158336', 'step': 11729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:26.191996', 'step': 11729, 'epoch': 2} {'type': 'loss', 'content': 0.051304835826158524, 'timestamp': '2025-10-01 04:27:26.195300', 'step': 11730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:26.229275', 'step': 11730, 'epoch': 2} {'type': 'loss', 'content': 0.07041792571544647, 'timestamp': '2025-10-01 04:27:26.231523', 'step': 11731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:26.265060', 'step': 11731, 'epoch': 2} {'type': 'loss', 'content': 0.14454719424247742, 'timestamp': '2025-10-01 04:27:26.289202', 'step': 11732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:26.340072', 'step': 11732, 'epoch': 2} {'type': 'loss', 'content': 0.12219109386205673, 'timestamp': '2025-10-01 04:27:26.342210', 'step': 11733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:26.377496', 'step': 11733, 'epoch': 2} {'type': 'loss', 'content': 0.17879191040992737, 'timestamp': '2025-10-01 04:27:26.380081', 'step': 11734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:26.416528', 'step': 11734, 'epoch': 2} {'type': 'loss', 'content': 0.11506368219852448, 'timestamp': '2025-10-01 04:27:26.418916', 'step': 11735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:26.452520', 'step': 11735, 'epoch': 2} {'type': 'loss', 'content': 0.08197332173585892, 'timestamp': '2025-10-01 04:27:26.476295', 'step': 11736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:26.518784', 'step': 11736, 'epoch': 2} {'type': 'loss', 'content': 0.0803983137011528, 'timestamp': '2025-10-01 04:27:26.520973', 'step': 11737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:26.554923', 'step': 11737, 'epoch': 2} {'type': 'loss', 'content': 0.0455886572599411, 'timestamp': '2025-10-01 04:27:26.557497', 'step': 11738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:26.589805', 'step': 11738, 'epoch': 2} {'type': 'loss', 'content': 0.1263483762741089, 'timestamp': '2025-10-01 04:27:26.592184', 'step': 11739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:26.635821', 'step': 11739, 'epoch': 2} {'type': 'loss', 'content': 0.18378178775310516, 'timestamp': '2025-10-01 04:27:26.663987', 'step': 11740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:26.710042', 'step': 11740, 'epoch': 2} {'type': 'loss', 'content': 0.10381446778774261, 'timestamp': '2025-10-01 04:27:26.712222', 'step': 11741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:26.744376', 'step': 11741, 'epoch': 2} {'type': 'loss', 'content': 0.0707785114645958, 'timestamp': '2025-10-01 04:27:26.747462', 'step': 11742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:26.779966', 'step': 11742, 'epoch': 2} {'type': 'loss', 'content': 0.09744612127542496, 'timestamp': '2025-10-01 04:27:26.782386', 'step': 11743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:26.818085', 'step': 11743, 'epoch': 2} {'type': 'loss', 'content': 0.12893956899642944, 'timestamp': '2025-10-01 04:27:26.841939', 'step': 11744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:26.874808', 'step': 11744, 'epoch': 2} {'type': 'loss', 'content': 0.09709784388542175, 'timestamp': '2025-10-01 04:27:26.876899', 'step': 11745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:26.941930', 'step': 11745, 'epoch': 2} {'type': 'loss', 'content': 0.10067027062177658, 'timestamp': '2025-10-01 04:27:26.944103', 'step': 11746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:26.985536', 'step': 11746, 'epoch': 2} {'type': 'loss', 'content': 0.14308466017246246, 'timestamp': '2025-10-01 04:27:26.987749', 'step': 11747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:27.040656', 'step': 11747, 'epoch': 2} {'type': 'loss', 'content': 0.07781490683555603, 'timestamp': '2025-10-01 04:27:27.064409', 'step': 11748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:27.098446', 'step': 11748, 'epoch': 2} {'type': 'loss', 'content': 0.09631720930337906, 'timestamp': '2025-10-01 04:27:27.100708', 'step': 11749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:27.150121', 'step': 11749, 'epoch': 2} {'type': 'loss', 'content': 0.2111339122056961, 'timestamp': '2025-10-01 04:27:27.152318', 'step': 11750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:27.184800', 'step': 11750, 'epoch': 2} {'type': 'loss', 'content': 0.11533927917480469, 'timestamp': '2025-10-01 04:27:27.187009', 'step': 11751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:27.233609', 'step': 11751, 'epoch': 2} {'type': 'loss', 'content': 0.05881476402282715, 'timestamp': '2025-10-01 04:27:27.257385', 'step': 11752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:27.289235', 'step': 11752, 'epoch': 2} {'type': 'loss', 'content': 0.2034703642129898, 'timestamp': '2025-10-01 04:27:27.291512', 'step': 11753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:27.325343', 'step': 11753, 'epoch': 2} {'type': 'loss', 'content': 0.07499515265226364, 'timestamp': '2025-10-01 04:27:27.327816', 'step': 11754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:27.368848', 'step': 11754, 'epoch': 2} {'type': 'loss', 'content': 0.09071095287799835, 'timestamp': '2025-10-01 04:27:27.371602', 'step': 11755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:27.415019', 'step': 11755, 'epoch': 2} {'type': 'loss', 'content': 0.15127992630004883, 'timestamp': '2025-10-01 04:27:27.438960', 'step': 11756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:27.481847', 'step': 11756, 'epoch': 2} {'type': 'loss', 'content': 0.16897469758987427, 'timestamp': '2025-10-01 04:27:27.484074', 'step': 11757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:27.517247', 'step': 11757, 'epoch': 2} {'type': 'loss', 'content': 0.08185137063264847, 'timestamp': '2025-10-01 04:27:27.519590', 'step': 11758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:27.559849', 'step': 11758, 'epoch': 2} {'type': 'loss', 'content': 0.18363645672798157, 'timestamp': '2025-10-01 04:27:27.562200', 'step': 11759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:27.598010', 'step': 11759, 'epoch': 2} {'type': 'loss', 'content': 0.16242195665836334, 'timestamp': '2025-10-01 04:27:27.621831', 'step': 11760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:27:27.657860', 'step': 11760, 'epoch': 2} {'type': 'loss', 'content': 0.1438436061143875, 'timestamp': '2025-10-01 04:27:27.660064', 'step': 11761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:27.695274', 'step': 11761, 'epoch': 2} {'type': 'loss', 'content': 0.0562291145324707, 'timestamp': '2025-10-01 04:27:27.697689', 'step': 11762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:27.744247', 'step': 11762, 'epoch': 2} {'type': 'loss', 'content': 0.10857755690813065, 'timestamp': '2025-10-01 04:27:27.746392', 'step': 11763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:27.781442', 'step': 11763, 'epoch': 2} {'type': 'loss', 'content': 0.09509412199258804, 'timestamp': '2025-10-01 04:27:27.806238', 'step': 11764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:27.851451', 'step': 11764, 'epoch': 2} {'type': 'loss', 'content': 0.10950034856796265, 'timestamp': '2025-10-01 04:27:27.853616', 'step': 11765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:27.898544', 'step': 11765, 'epoch': 2} {'type': 'loss', 'content': 0.05413169786334038, 'timestamp': '2025-10-01 04:27:27.900860', 'step': 11766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:27.933783', 'step': 11766, 'epoch': 2} {'type': 'loss', 'content': 0.08680357784032822, 'timestamp': '2025-10-01 04:27:27.946350', 'step': 11767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:27.982629', 'step': 11767, 'epoch': 2} {'type': 'loss', 'content': 0.17559772729873657, 'timestamp': '2025-10-01 04:27:28.020135', 'step': 11768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:28.052761', 'step': 11768, 'epoch': 2} {'type': 'loss', 'content': 0.12972159683704376, 'timestamp': '2025-10-01 04:27:28.054959', 'step': 11769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:28.089507', 'step': 11769, 'epoch': 2} {'type': 'loss', 'content': 0.14492051303386688, 'timestamp': '2025-10-01 04:27:28.091760', 'step': 11770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:28.161861', 'step': 11770, 'epoch': 2} {'type': 'loss', 'content': 0.02177642658352852, 'timestamp': '2025-10-01 04:27:28.164055', 'step': 11771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:28.203340', 'step': 11771, 'epoch': 2} {'type': 'loss', 'content': 0.15134355425834656, 'timestamp': '2025-10-01 04:27:28.227259', 'step': 11772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:28.260311', 'step': 11772, 'epoch': 2} {'type': 'loss', 'content': 0.05813022330403328, 'timestamp': '2025-10-01 04:27:28.262609', 'step': 11773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:28.305075', 'step': 11773, 'epoch': 2} {'type': 'loss', 'content': 0.14014138281345367, 'timestamp': '2025-10-01 04:27:28.307180', 'step': 11774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:28.340964', 'step': 11774, 'epoch': 2} {'type': 'loss', 'content': 0.14560456573963165, 'timestamp': '2025-10-01 04:27:28.343640', 'step': 11775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:28.383466', 'step': 11775, 'epoch': 2} {'type': 'loss', 'content': 0.08215660601854324, 'timestamp': '2025-10-01 04:27:28.407122', 'step': 11776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:28.448005', 'step': 11776, 'epoch': 2} {'type': 'loss', 'content': 0.22179703414440155, 'timestamp': '2025-10-01 04:27:28.450180', 'step': 11777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:28.482692', 'step': 11777, 'epoch': 2} {'type': 'loss', 'content': 0.06998367607593536, 'timestamp': '2025-10-01 04:27:28.484836', 'step': 11778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:28.523895', 'step': 11778, 'epoch': 2} {'type': 'loss', 'content': 0.09640335291624069, 'timestamp': '2025-10-01 04:27:28.527841', 'step': 11779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:28.560121', 'step': 11779, 'epoch': 2} {'type': 'loss', 'content': 0.14142563939094543, 'timestamp': '2025-10-01 04:27:28.583901', 'step': 11780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:28.629707', 'step': 11780, 'epoch': 2} {'type': 'loss', 'content': 0.10619083046913147, 'timestamp': '2025-10-01 04:27:28.631869', 'step': 11781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:28.665603', 'step': 11781, 'epoch': 2} {'type': 'loss', 'content': 0.11723791062831879, 'timestamp': '2025-10-01 04:27:28.667771', 'step': 11782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:28.700950', 'step': 11782, 'epoch': 2} {'type': 'loss', 'content': 0.1374681442975998, 'timestamp': '2025-10-01 04:27:28.703168', 'step': 11783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:28.738119', 'step': 11783, 'epoch': 2} {'type': 'loss', 'content': 0.12075865268707275, 'timestamp': '2025-10-01 04:27:28.761940', 'step': 11784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:28.812714', 'step': 11784, 'epoch': 2} {'type': 'loss', 'content': 0.07481425255537033, 'timestamp': '2025-10-01 04:27:28.814896', 'step': 11785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:28.851243', 'step': 11785, 'epoch': 2} {'type': 'loss', 'content': 0.11697860807180405, 'timestamp': '2025-10-01 04:27:28.853939', 'step': 11786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:28.904194', 'step': 11786, 'epoch': 2} {'type': 'loss', 'content': 0.12975353002548218, 'timestamp': '2025-10-01 04:27:28.906501', 'step': 11787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:28.940237', 'step': 11787, 'epoch': 2} {'type': 'loss', 'content': 0.07144332677125931, 'timestamp': '2025-10-01 04:27:28.964386', 'step': 11788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:28.999309', 'step': 11788, 'epoch': 2} {'type': 'loss', 'content': 0.10900875180959702, 'timestamp': '2025-10-01 04:27:29.001512', 'step': 11789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:29.037382', 'step': 11789, 'epoch': 2} {'type': 'loss', 'content': 0.17732197046279907, 'timestamp': '2025-10-01 04:27:29.040628', 'step': 11790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:29.077248', 'step': 11790, 'epoch': 2} {'type': 'loss', 'content': 0.14629322290420532, 'timestamp': '2025-10-01 04:27:29.079522', 'step': 11791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:29.114815', 'step': 11791, 'epoch': 2} {'type': 'loss', 'content': 0.08239532262086868, 'timestamp': '2025-10-01 04:27:29.138530', 'step': 11792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:29.174110', 'step': 11792, 'epoch': 2} {'type': 'loss', 'content': 0.11816620081663132, 'timestamp': '2025-10-01 04:27:29.176737', 'step': 11793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:29.210400', 'step': 11793, 'epoch': 2} {'type': 'loss', 'content': 0.06410633027553558, 'timestamp': '2025-10-01 04:27:29.212606', 'step': 11794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:29.261707', 'step': 11794, 'epoch': 2} {'type': 'loss', 'content': 0.08273293077945709, 'timestamp': '2025-10-01 04:27:29.278935', 'step': 11795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:29.324224', 'step': 11795, 'epoch': 2} {'type': 'loss', 'content': 0.13599126040935516, 'timestamp': '2025-10-01 04:27:29.348056', 'step': 11796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:29.388605', 'step': 11796, 'epoch': 2} {'type': 'loss', 'content': 0.08417028933763504, 'timestamp': '2025-10-01 04:27:29.390909', 'step': 11797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:29.426277', 'step': 11797, 'epoch': 2} {'type': 'loss', 'content': 0.08359590172767639, 'timestamp': '2025-10-01 04:27:29.428523', 'step': 11798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:29.461526', 'step': 11798, 'epoch': 2} {'type': 'loss', 'content': 0.14169557392597198, 'timestamp': '2025-10-01 04:27:29.463638', 'step': 11799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:29.497115', 'step': 11799, 'epoch': 2} {'type': 'loss', 'content': 0.0677538514137268, 'timestamp': '2025-10-01 04:27:29.520913', 'step': 11800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:29.552683', 'step': 11800, 'epoch': 2} {'type': 'loss', 'content': 0.1010906919836998, 'timestamp': '2025-10-01 04:27:29.554994', 'step': 11801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:29.587659', 'step': 11801, 'epoch': 2} {'type': 'loss', 'content': 0.08906467258930206, 'timestamp': '2025-10-01 04:27:29.590215', 'step': 11802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:29.635055', 'step': 11802, 'epoch': 2} {'type': 'loss', 'content': 0.097224660217762, 'timestamp': '2025-10-01 04:27:29.637324', 'step': 11803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:29.672061', 'step': 11803, 'epoch': 2} {'type': 'loss', 'content': 0.1185239851474762, 'timestamp': '2025-10-01 04:27:29.695811', 'step': 11804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:29.729621', 'step': 11804, 'epoch': 2} {'type': 'loss', 'content': 0.07016720622777939, 'timestamp': '2025-10-01 04:27:29.731843', 'step': 11805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:29.766497', 'step': 11805, 'epoch': 2} {'type': 'loss', 'content': 0.15153145790100098, 'timestamp': '2025-10-01 04:27:29.775441', 'step': 11806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:29.808897', 'step': 11806, 'epoch': 2} {'type': 'loss', 'content': 0.161098450422287, 'timestamp': '2025-10-01 04:27:29.811207', 'step': 11807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:29.845227', 'step': 11807, 'epoch': 2} {'type': 'loss', 'content': 0.11705511063337326, 'timestamp': '2025-10-01 04:27:29.869026', 'step': 11808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:29.902750', 'step': 11808, 'epoch': 2} {'type': 'loss', 'content': 0.09345144033432007, 'timestamp': '2025-10-01 04:27:29.904885', 'step': 11809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:29.938799', 'step': 11809, 'epoch': 2} {'type': 'loss', 'content': 0.14191488921642303, 'timestamp': '2025-10-01 04:27:29.941048', 'step': 11810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:29.973380', 'step': 11810, 'epoch': 2} {'type': 'loss', 'content': 0.09702149778604507, 'timestamp': '2025-10-01 04:27:29.975669', 'step': 11811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:30.015780', 'step': 11811, 'epoch': 2} {'type': 'loss', 'content': 0.12413682788610458, 'timestamp': '2025-10-01 04:27:30.040843', 'step': 11812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:30.073261', 'step': 11812, 'epoch': 2} {'type': 'loss', 'content': 0.08693195879459381, 'timestamp': '2025-10-01 04:27:30.075544', 'step': 11813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:30.137647', 'step': 11813, 'epoch': 2} {'type': 'loss', 'content': 0.10821318626403809, 'timestamp': '2025-10-01 04:27:30.139760', 'step': 11814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:30.174905', 'step': 11814, 'epoch': 2} {'type': 'loss', 'content': 0.16697674989700317, 'timestamp': '2025-10-01 04:27:30.177295', 'step': 11815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:30.210957', 'step': 11815, 'epoch': 2} {'type': 'loss', 'content': 0.046617183834314346, 'timestamp': '2025-10-01 04:27:30.235743', 'step': 11816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:30.271743', 'step': 11816, 'epoch': 2} {'type': 'loss', 'content': 0.15751145780086517, 'timestamp': '2025-10-01 04:27:30.274803', 'step': 11817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:30.318734', 'step': 11817, 'epoch': 2} {'type': 'loss', 'content': 0.1654798686504364, 'timestamp': '2025-10-01 04:27:30.320991', 'step': 11818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:30.369291', 'step': 11818, 'epoch': 2} {'type': 'loss', 'content': 0.1973457634449005, 'timestamp': '2025-10-01 04:27:30.371819', 'step': 11819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:30.416294', 'step': 11819, 'epoch': 2} {'type': 'loss', 'content': 0.033454250544309616, 'timestamp': '2025-10-01 04:27:30.440003', 'step': 11820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:30.480448', 'step': 11820, 'epoch': 2} {'type': 'loss', 'content': 0.031163685023784637, 'timestamp': '2025-10-01 04:27:30.482648', 'step': 11821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:30.517566', 'step': 11821, 'epoch': 2} {'type': 'loss', 'content': 0.1640775054693222, 'timestamp': '2025-10-01 04:27:30.519755', 'step': 11822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:30.562867', 'step': 11822, 'epoch': 2} {'type': 'loss', 'content': 0.04471859708428383, 'timestamp': '2025-10-01 04:27:30.565309', 'step': 11823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:30.610577', 'step': 11823, 'epoch': 2} {'type': 'loss', 'content': 0.12690532207489014, 'timestamp': '2025-10-01 04:27:30.634270', 'step': 11824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:30.679461', 'step': 11824, 'epoch': 2} {'type': 'loss', 'content': 0.133375346660614, 'timestamp': '2025-10-01 04:27:30.681803', 'step': 11825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:30.714546', 'step': 11825, 'epoch': 2} {'type': 'loss', 'content': 0.12296555191278458, 'timestamp': '2025-10-01 04:27:30.716701', 'step': 11826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:30.769380', 'step': 11826, 'epoch': 2} {'type': 'loss', 'content': 0.08801640570163727, 'timestamp': '2025-10-01 04:27:30.771564', 'step': 11827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:30.803885', 'step': 11827, 'epoch': 2} {'type': 'loss', 'content': 0.17747622728347778, 'timestamp': '2025-10-01 04:27:30.827732', 'step': 11828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:30.860252', 'step': 11828, 'epoch': 2} {'type': 'loss', 'content': 0.1002286970615387, 'timestamp': '2025-10-01 04:27:30.862509', 'step': 11829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:30.894135', 'step': 11829, 'epoch': 2} {'type': 'loss', 'content': 0.24150121212005615, 'timestamp': '2025-10-01 04:27:30.896558', 'step': 11830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:30.929756', 'step': 11830, 'epoch': 2} {'type': 'loss', 'content': 0.1515597701072693, 'timestamp': '2025-10-01 04:27:30.932587', 'step': 11831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:30.974687', 'step': 11831, 'epoch': 2} {'type': 'loss', 'content': 0.19912049174308777, 'timestamp': '2025-10-01 04:27:30.998964', 'step': 11832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:31.040212', 'step': 11832, 'epoch': 2} {'type': 'loss', 'content': 0.10144933313131332, 'timestamp': '2025-10-01 04:27:31.042528', 'step': 11833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:31.075254', 'step': 11833, 'epoch': 2} {'type': 'loss', 'content': 0.20230887830257416, 'timestamp': '2025-10-01 04:27:31.077450', 'step': 11834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:31.117558', 'step': 11834, 'epoch': 2} {'type': 'loss', 'content': 0.15454937517642975, 'timestamp': '2025-10-01 04:27:31.119951', 'step': 11835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:31.151452', 'step': 11835, 'epoch': 2} {'type': 'loss', 'content': 0.09870560467243195, 'timestamp': '2025-10-01 04:27:31.186111', 'step': 11836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:31.219196', 'step': 11836, 'epoch': 2} {'type': 'loss', 'content': 0.04494384303689003, 'timestamp': '2025-10-01 04:27:31.223798', 'step': 11837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:31.263890', 'step': 11837, 'epoch': 2} {'type': 'loss', 'content': 0.08392956107854843, 'timestamp': '2025-10-01 04:27:31.266340', 'step': 11838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:31.302785', 'step': 11838, 'epoch': 2} {'type': 'loss', 'content': 0.14880357682704926, 'timestamp': '2025-10-01 04:27:31.305198', 'step': 11839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:31.340233', 'step': 11839, 'epoch': 2} {'type': 'loss', 'content': 0.08954941481351852, 'timestamp': '2025-10-01 04:27:31.364564', 'step': 11840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:31.400250', 'step': 11840, 'epoch': 2} {'type': 'loss', 'content': 0.13965287804603577, 'timestamp': '2025-10-01 04:27:31.402716', 'step': 11841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:31.466798', 'step': 11841, 'epoch': 2} {'type': 'loss', 'content': 0.1579948216676712, 'timestamp': '2025-10-01 04:27:31.469550', 'step': 11842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:31.506403', 'step': 11842, 'epoch': 2} {'type': 'loss', 'content': 0.09901440888643265, 'timestamp': '2025-10-01 04:27:31.508763', 'step': 11843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:31.547604', 'step': 11843, 'epoch': 2} {'type': 'loss', 'content': 0.041268471628427505, 'timestamp': '2025-10-01 04:27:31.571687', 'step': 11844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:31.615335', 'step': 11844, 'epoch': 2} {'type': 'loss', 'content': 0.13178016245365143, 'timestamp': '2025-10-01 04:27:31.617923', 'step': 11845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:31.660005', 'step': 11845, 'epoch': 2} {'type': 'loss', 'content': 0.12086395174264908, 'timestamp': '2025-10-01 04:27:31.662975', 'step': 11846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:31.696790', 'step': 11846, 'epoch': 2} {'type': 'loss', 'content': 0.14487670361995697, 'timestamp': '2025-10-01 04:27:31.698995', 'step': 11847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:31.744092', 'step': 11847, 'epoch': 2} {'type': 'loss', 'content': 0.027888597920536995, 'timestamp': '2025-10-01 04:27:31.768967', 'step': 11848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:31.803277', 'step': 11848, 'epoch': 2} {'type': 'loss', 'content': 0.12474007904529572, 'timestamp': '2025-10-01 04:27:31.805913', 'step': 11849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:31.840596', 'step': 11849, 'epoch': 2} {'type': 'loss', 'content': 0.055694565176963806, 'timestamp': '2025-10-01 04:27:31.843184', 'step': 11850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:31.879343', 'step': 11850, 'epoch': 2} {'type': 'loss', 'content': 0.08571948111057281, 'timestamp': '2025-10-01 04:27:31.881827', 'step': 11851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:31.952953', 'step': 11851, 'epoch': 2} {'type': 'loss', 'content': 0.11707496643066406, 'timestamp': '2025-10-01 04:27:31.976900', 'step': 11852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:32.021780', 'step': 11852, 'epoch': 2} {'type': 'loss', 'content': 0.03921579197049141, 'timestamp': '2025-10-01 04:27:32.024294', 'step': 11853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:32.065299', 'step': 11853, 'epoch': 2} {'type': 'loss', 'content': 0.15773184597492218, 'timestamp': '2025-10-01 04:27:32.070067', 'step': 11854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:32.106747', 'step': 11854, 'epoch': 2} {'type': 'loss', 'content': 0.13203805685043335, 'timestamp': '2025-10-01 04:27:32.109329', 'step': 11855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:32.144064', 'step': 11855, 'epoch': 2} {'type': 'loss', 'content': 0.18192149698734283, 'timestamp': '2025-10-01 04:27:32.168115', 'step': 11856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:32.217019', 'step': 11856, 'epoch': 2} {'type': 'loss', 'content': 0.11193785816431046, 'timestamp': '2025-10-01 04:27:32.219576', 'step': 11857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:32.262912', 'step': 11857, 'epoch': 2} {'type': 'loss', 'content': 0.11677949130535126, 'timestamp': '2025-10-01 04:27:32.267002', 'step': 11858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:32.305876', 'step': 11858, 'epoch': 2} {'type': 'loss', 'content': 0.0836416706442833, 'timestamp': '2025-10-01 04:27:32.308814', 'step': 11859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:32.347693', 'step': 11859, 'epoch': 2} {'type': 'loss', 'content': 0.15657758712768555, 'timestamp': '2025-10-01 04:27:32.372979', 'step': 11860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:32.405868', 'step': 11860, 'epoch': 2} {'type': 'loss', 'content': 0.12583886086940765, 'timestamp': '2025-10-01 04:27:32.408292', 'step': 11861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:32.446407', 'step': 11861, 'epoch': 2} {'type': 'loss', 'content': 0.19627226889133453, 'timestamp': '2025-10-01 04:27:32.449032', 'step': 11862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:32.484148', 'step': 11862, 'epoch': 2} {'type': 'loss', 'content': 0.10852959752082825, 'timestamp': '2025-10-01 04:27:32.486495', 'step': 11863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:32.523752', 'step': 11863, 'epoch': 2} {'type': 'loss', 'content': 0.08082505315542221, 'timestamp': '2025-10-01 04:27:32.553032', 'step': 11864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:32.585813', 'step': 11864, 'epoch': 2} {'type': 'loss', 'content': 0.03599155694246292, 'timestamp': '2025-10-01 04:27:32.589299', 'step': 11865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:32.624025', 'step': 11865, 'epoch': 2} {'type': 'loss', 'content': 0.13129670917987823, 'timestamp': '2025-10-01 04:27:32.628034', 'step': 11866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:32.675721', 'step': 11866, 'epoch': 2} {'type': 'loss', 'content': 0.06563524901866913, 'timestamp': '2025-10-01 04:27:32.678287', 'step': 11867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:32.721103', 'step': 11867, 'epoch': 2} {'type': 'loss', 'content': 0.1299879103899002, 'timestamp': '2025-10-01 04:27:32.745142', 'step': 11868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:32.794271', 'step': 11868, 'epoch': 2} {'type': 'loss', 'content': 0.0700647309422493, 'timestamp': '2025-10-01 04:27:32.797113', 'step': 11869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:32.839921', 'step': 11869, 'epoch': 2} {'type': 'loss', 'content': 0.09220080077648163, 'timestamp': '2025-10-01 04:27:32.842304', 'step': 11870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:32.880112', 'step': 11870, 'epoch': 2} {'type': 'loss', 'content': 0.1623617261648178, 'timestamp': '2025-10-01 04:27:32.883038', 'step': 11871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:32.915718', 'step': 11871, 'epoch': 2} {'type': 'loss', 'content': 0.12033575028181076, 'timestamp': '2025-10-01 04:27:32.939584', 'step': 11872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:32.977203', 'step': 11872, 'epoch': 2} {'type': 'loss', 'content': 0.12558671832084656, 'timestamp': '2025-10-01 04:27:32.979635', 'step': 11873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:33.013687', 'step': 11873, 'epoch': 2} {'type': 'loss', 'content': 0.12360294163227081, 'timestamp': '2025-10-01 04:27:33.016075', 'step': 11874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:33.064245', 'step': 11874, 'epoch': 2} {'type': 'loss', 'content': 0.030417434871196747, 'timestamp': '2025-10-01 04:27:33.068214', 'step': 11875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:33.115064', 'step': 11875, 'epoch': 2} {'type': 'loss', 'content': 0.0982288122177124, 'timestamp': '2025-10-01 04:27:33.138842', 'step': 11876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:33.173345', 'step': 11876, 'epoch': 2} {'type': 'loss', 'content': 0.08770623803138733, 'timestamp': '2025-10-01 04:27:33.175723', 'step': 11877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:33.209411', 'step': 11877, 'epoch': 2} {'type': 'loss', 'content': 0.137311652302742, 'timestamp': '2025-10-01 04:27:33.211663', 'step': 11878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:33.256382', 'step': 11878, 'epoch': 2} {'type': 'loss', 'content': 0.11678363382816315, 'timestamp': '2025-10-01 04:27:33.258771', 'step': 11879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:33.297255', 'step': 11879, 'epoch': 2} {'type': 'loss', 'content': 0.14308838546276093, 'timestamp': '2025-10-01 04:27:33.320959', 'step': 11880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:33.356417', 'step': 11880, 'epoch': 2} {'type': 'loss', 'content': 0.14435967803001404, 'timestamp': '2025-10-01 04:27:33.358781', 'step': 11881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:33.404443', 'step': 11881, 'epoch': 2} {'type': 'loss', 'content': 0.1083003357052803, 'timestamp': '2025-10-01 04:27:33.406997', 'step': 11882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:33.440994', 'step': 11882, 'epoch': 2} {'type': 'loss', 'content': 0.16267377138137817, 'timestamp': '2025-10-01 04:27:33.443355', 'step': 11883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:33.480181', 'step': 11883, 'epoch': 2} {'type': 'loss', 'content': 0.10718007385730743, 'timestamp': '2025-10-01 04:27:33.504180', 'step': 11884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:33.545456', 'step': 11884, 'epoch': 2} {'type': 'loss', 'content': 0.09575417637825012, 'timestamp': '2025-10-01 04:27:33.547805', 'step': 11885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:33.587586', 'step': 11885, 'epoch': 2} {'type': 'loss', 'content': 0.15667760372161865, 'timestamp': '2025-10-01 04:27:33.590162', 'step': 11886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:33.625227', 'step': 11886, 'epoch': 2} {'type': 'loss', 'content': 0.08073527365922928, 'timestamp': '2025-10-01 04:27:33.627752', 'step': 11887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:33.662670', 'step': 11887, 'epoch': 2} {'type': 'loss', 'content': 0.055782802402973175, 'timestamp': '2025-10-01 04:27:33.686655', 'step': 11888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:33.733273', 'step': 11888, 'epoch': 2} {'type': 'loss', 'content': 0.1610439568758011, 'timestamp': '2025-10-01 04:27:33.735754', 'step': 11889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:33.780555', 'step': 11889, 'epoch': 2} {'type': 'loss', 'content': 0.12815578281879425, 'timestamp': '2025-10-01 04:27:33.782822', 'step': 11890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:33.817693', 'step': 11890, 'epoch': 2} {'type': 'loss', 'content': 0.10330260545015335, 'timestamp': '2025-10-01 04:27:33.819910', 'step': 11891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:33.853336', 'step': 11891, 'epoch': 2} {'type': 'loss', 'content': 0.14794902503490448, 'timestamp': '2025-10-01 04:27:33.877219', 'step': 11892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:33.920668', 'step': 11892, 'epoch': 2} {'type': 'loss', 'content': 0.09260562807321548, 'timestamp': '2025-10-01 04:27:33.922894', 'step': 11893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:33.956006', 'step': 11893, 'epoch': 2} {'type': 'loss', 'content': 0.10971124470233917, 'timestamp': '2025-10-01 04:27:33.958699', 'step': 11894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:34.004169', 'step': 11894, 'epoch': 2} {'type': 'loss', 'content': 0.2241222858428955, 'timestamp': '2025-10-01 04:27:34.008174', 'step': 11895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:34.044357', 'step': 11895, 'epoch': 2} {'type': 'loss', 'content': 0.045512039214372635, 'timestamp': '2025-10-01 04:27:34.068152', 'step': 11896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:34.115069', 'step': 11896, 'epoch': 2} {'type': 'loss', 'content': 0.14357349276542664, 'timestamp': '2025-10-01 04:27:34.117325', 'step': 11897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:34.163693', 'step': 11897, 'epoch': 2} {'type': 'loss', 'content': 0.08994931727647781, 'timestamp': '2025-10-01 04:27:34.166278', 'step': 11898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:34.211399', 'step': 11898, 'epoch': 2} {'type': 'loss', 'content': 0.10146253556013107, 'timestamp': '2025-10-01 04:27:34.213930', 'step': 11899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:34.250058', 'step': 11899, 'epoch': 2} {'type': 'loss', 'content': 0.09123535454273224, 'timestamp': '2025-10-01 04:27:34.273806', 'step': 11900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:34.321199', 'step': 11900, 'epoch': 2} {'type': 'loss', 'content': 0.19782468676567078, 'timestamp': '2025-10-01 04:27:34.323582', 'step': 11901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:34.357887', 'step': 11901, 'epoch': 2} {'type': 'loss', 'content': 0.12080633640289307, 'timestamp': '2025-10-01 04:27:34.360710', 'step': 11902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:34.395220', 'step': 11902, 'epoch': 2} {'type': 'loss', 'content': 0.12692610919475555, 'timestamp': '2025-10-01 04:27:34.397765', 'step': 11903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:34.437620', 'step': 11903, 'epoch': 2} {'type': 'loss', 'content': 0.16328106820583344, 'timestamp': '2025-10-01 04:27:34.461259', 'step': 11904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:34.498747', 'step': 11904, 'epoch': 2} {'type': 'loss', 'content': 0.13554497063159943, 'timestamp': '2025-10-01 04:27:34.501181', 'step': 11905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:34.539429', 'step': 11905, 'epoch': 2} {'type': 'loss', 'content': 0.07288061082363129, 'timestamp': '2025-10-01 04:27:34.542054', 'step': 11906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:34.583201', 'step': 11906, 'epoch': 2} {'type': 'loss', 'content': 0.061038706451654434, 'timestamp': '2025-10-01 04:27:34.585765', 'step': 11907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:34.619743', 'step': 11907, 'epoch': 2} {'type': 'loss', 'content': 0.12626858055591583, 'timestamp': '2025-10-01 04:27:34.643391', 'step': 11908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:34.676684', 'step': 11908, 'epoch': 2} {'type': 'loss', 'content': 0.05061771720647812, 'timestamp': '2025-10-01 04:27:34.678910', 'step': 11909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:34.712155', 'step': 11909, 'epoch': 2} {'type': 'loss', 'content': 0.21041829884052277, 'timestamp': '2025-10-01 04:27:34.714358', 'step': 11910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:34.749762', 'step': 11910, 'epoch': 2} {'type': 'loss', 'content': 0.15309548377990723, 'timestamp': '2025-10-01 04:27:34.752541', 'step': 11911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:34.789666', 'step': 11911, 'epoch': 2} {'type': 'loss', 'content': 0.08195841312408447, 'timestamp': '2025-10-01 04:27:34.813505', 'step': 11912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:34.856963', 'step': 11912, 'epoch': 2} {'type': 'loss', 'content': 0.0931081473827362, 'timestamp': '2025-10-01 04:27:34.859272', 'step': 11913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:34.897777', 'step': 11913, 'epoch': 2} {'type': 'loss', 'content': 0.1371220201253891, 'timestamp': '2025-10-01 04:27:34.900539', 'step': 11914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:34.935440', 'step': 11914, 'epoch': 2} {'type': 'loss', 'content': 0.19790783524513245, 'timestamp': '2025-10-01 04:27:34.938290', 'step': 11915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:34.983185', 'step': 11915, 'epoch': 2} {'type': 'loss', 'content': 0.0799105241894722, 'timestamp': '2025-10-01 04:27:35.006816', 'step': 11916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:35.042398', 'step': 11916, 'epoch': 2} {'type': 'loss', 'content': 0.07184606790542603, 'timestamp': '2025-10-01 04:27:35.044714', 'step': 11917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:35.090798', 'step': 11917, 'epoch': 2} {'type': 'loss', 'content': 0.08095603436231613, 'timestamp': '2025-10-01 04:27:35.093298', 'step': 11918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:35.139911', 'step': 11918, 'epoch': 2} {'type': 'loss', 'content': 0.10371211916208267, 'timestamp': '2025-10-01 04:27:35.142077', 'step': 11919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:35.177472', 'step': 11919, 'epoch': 2} {'type': 'loss', 'content': 0.11810958385467529, 'timestamp': '2025-10-01 04:27:35.201262', 'step': 11920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:35.240442', 'step': 11920, 'epoch': 2} {'type': 'loss', 'content': 0.11296993494033813, 'timestamp': '2025-10-01 04:27:35.243590', 'step': 11921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:35.290213', 'step': 11921, 'epoch': 2} {'type': 'loss', 'content': 0.07584276795387268, 'timestamp': '2025-10-01 04:27:35.292347', 'step': 11922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:35.336076', 'step': 11922, 'epoch': 2} {'type': 'loss', 'content': 0.1610690802335739, 'timestamp': '2025-10-01 04:27:35.338280', 'step': 11923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:35.369938', 'step': 11923, 'epoch': 2} {'type': 'loss', 'content': 0.1412065029144287, 'timestamp': '2025-10-01 04:27:35.393548', 'step': 11924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:35.427795', 'step': 11924, 'epoch': 2} {'type': 'loss', 'content': 0.056548502296209335, 'timestamp': '2025-10-01 04:27:35.430063', 'step': 11925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:35.474190', 'step': 11925, 'epoch': 2} {'type': 'loss', 'content': 0.12941548228263855, 'timestamp': '2025-10-01 04:27:35.476386', 'step': 11926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:35.511613', 'step': 11926, 'epoch': 2} {'type': 'loss', 'content': 0.12580454349517822, 'timestamp': '2025-10-01 04:27:35.513749', 'step': 11927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:27:35.550769', 'step': 11927, 'epoch': 2} {'type': 'loss', 'content': 0.09770136326551437, 'timestamp': '2025-10-01 04:27:35.578779', 'step': 11928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:35.613393', 'step': 11928, 'epoch': 2} {'type': 'loss', 'content': 0.03531176224350929, 'timestamp': '2025-10-01 04:27:35.615551', 'step': 11929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:35.664107', 'step': 11929, 'epoch': 2} {'type': 'loss', 'content': 0.08816488832235336, 'timestamp': '2025-10-01 04:27:35.668538', 'step': 11930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:35.702485', 'step': 11930, 'epoch': 2} {'type': 'loss', 'content': 0.162843719124794, 'timestamp': '2025-10-01 04:27:35.706019', 'step': 11931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:35.739528', 'step': 11931, 'epoch': 2} {'type': 'loss', 'content': 0.09680682420730591, 'timestamp': '2025-10-01 04:27:35.763632', 'step': 11932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:35.797594', 'step': 11932, 'epoch': 2} {'type': 'loss', 'content': 0.2031017243862152, 'timestamp': '2025-10-01 04:27:35.800107', 'step': 11933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:35.835040', 'step': 11933, 'epoch': 2} {'type': 'loss', 'content': 0.06929480284452438, 'timestamp': '2025-10-01 04:27:35.837245', 'step': 11934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:35.874436', 'step': 11934, 'epoch': 2} {'type': 'loss', 'content': 0.14661161601543427, 'timestamp': '2025-10-01 04:27:35.876654', 'step': 11935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:35.911577', 'step': 11935, 'epoch': 2} {'type': 'loss', 'content': 0.07074622809886932, 'timestamp': '2025-10-01 04:27:35.935363', 'step': 11936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:35.975837', 'step': 11936, 'epoch': 2} {'type': 'loss', 'content': 0.08415619283914566, 'timestamp': '2025-10-01 04:27:35.978399', 'step': 11937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:36.012622', 'step': 11937, 'epoch': 2} {'type': 'loss', 'content': 0.06574970483779907, 'timestamp': '2025-10-01 04:27:36.014771', 'step': 11938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:36.049436', 'step': 11938, 'epoch': 2} {'type': 'loss', 'content': 0.06170574948191643, 'timestamp': '2025-10-01 04:27:36.051897', 'step': 11939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:36.084365', 'step': 11939, 'epoch': 2} {'type': 'loss', 'content': 0.12737073004245758, 'timestamp': '2025-10-01 04:27:36.108466', 'step': 11940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:36.152420', 'step': 11940, 'epoch': 2} {'type': 'loss', 'content': 0.14816917479038239, 'timestamp': '2025-10-01 04:27:36.154668', 'step': 11941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:36.194068', 'step': 11941, 'epoch': 2} {'type': 'loss', 'content': 0.06837307661771774, 'timestamp': '2025-10-01 04:27:36.196240', 'step': 11942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:36.253364', 'step': 11942, 'epoch': 2} {'type': 'loss', 'content': 0.06584622710943222, 'timestamp': '2025-10-01 04:27:36.255576', 'step': 11943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:36.292304', 'step': 11943, 'epoch': 2} {'type': 'loss', 'content': 0.1304917186498642, 'timestamp': '2025-10-01 04:27:36.316026', 'step': 11944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:36.350689', 'step': 11944, 'epoch': 2} {'type': 'loss', 'content': 0.029264993965625763, 'timestamp': '2025-10-01 04:27:36.352938', 'step': 11945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:36.386937', 'step': 11945, 'epoch': 2} {'type': 'loss', 'content': 0.1470312625169754, 'timestamp': '2025-10-01 04:27:36.389570', 'step': 11946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:36.430698', 'step': 11946, 'epoch': 2} {'type': 'loss', 'content': 0.12572398781776428, 'timestamp': '2025-10-01 04:27:36.432977', 'step': 11947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:36.467814', 'step': 11947, 'epoch': 2} {'type': 'loss', 'content': 0.06928739696741104, 'timestamp': '2025-10-01 04:27:36.491686', 'step': 11948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:36.539217', 'step': 11948, 'epoch': 2} {'type': 'loss', 'content': 0.07349404692649841, 'timestamp': '2025-10-01 04:27:36.541400', 'step': 11949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:36.582058', 'step': 11949, 'epoch': 2} {'type': 'loss', 'content': 0.08254605531692505, 'timestamp': '2025-10-01 04:27:36.584627', 'step': 11950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:36.619017', 'step': 11950, 'epoch': 2} {'type': 'loss', 'content': 0.14004692435264587, 'timestamp': '2025-10-01 04:27:36.621173', 'step': 11951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:36.656668', 'step': 11951, 'epoch': 2} {'type': 'loss', 'content': 0.0622502937912941, 'timestamp': '2025-10-01 04:27:36.680531', 'step': 11952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:36.734249', 'step': 11952, 'epoch': 2} {'type': 'loss', 'content': 0.10467216372489929, 'timestamp': '2025-10-01 04:27:36.736372', 'step': 11953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:36.785403', 'step': 11953, 'epoch': 2} {'type': 'loss', 'content': 0.07253723591566086, 'timestamp': '2025-10-01 04:27:36.787692', 'step': 11954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:36.826326', 'step': 11954, 'epoch': 2} {'type': 'loss', 'content': 0.07062643021345139, 'timestamp': '2025-10-01 04:27:36.828454', 'step': 11955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:36.864059', 'step': 11955, 'epoch': 2} {'type': 'loss', 'content': 0.091606505215168, 'timestamp': '2025-10-01 04:27:36.887798', 'step': 11956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:36.924104', 'step': 11956, 'epoch': 2} {'type': 'loss', 'content': 0.18689601123332977, 'timestamp': '2025-10-01 04:27:36.926309', 'step': 11957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:36.960593', 'step': 11957, 'epoch': 2} {'type': 'loss', 'content': 0.13873226940631866, 'timestamp': '2025-10-01 04:27:36.962884', 'step': 11958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:37.005660', 'step': 11958, 'epoch': 2} {'type': 'loss', 'content': 0.16229462623596191, 'timestamp': '2025-10-01 04:27:37.007975', 'step': 11959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:37.052112', 'step': 11959, 'epoch': 2} {'type': 'loss', 'content': 0.05651850998401642, 'timestamp': '2025-10-01 04:27:37.076058', 'step': 11960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:37.112679', 'step': 11960, 'epoch': 2} {'type': 'loss', 'content': 0.14045609533786774, 'timestamp': '2025-10-01 04:27:37.114937', 'step': 11961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:37.157577', 'step': 11961, 'epoch': 2} {'type': 'loss', 'content': 0.07043226063251495, 'timestamp': '2025-10-01 04:27:37.159855', 'step': 11962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:37.193565', 'step': 11962, 'epoch': 2} {'type': 'loss', 'content': 0.19260327517986298, 'timestamp': '2025-10-01 04:27:37.195885', 'step': 11963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:37.229681', 'step': 11963, 'epoch': 2} {'type': 'loss', 'content': 0.07734126597642899, 'timestamp': '2025-10-01 04:27:37.253392', 'step': 11964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:37.288434', 'step': 11964, 'epoch': 2} {'type': 'loss', 'content': 0.13249599933624268, 'timestamp': '2025-10-01 04:27:37.290710', 'step': 11965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:37.323972', 'step': 11965, 'epoch': 2} {'type': 'loss', 'content': 0.09897999465465546, 'timestamp': '2025-10-01 04:27:37.326219', 'step': 11966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:37.359469', 'step': 11966, 'epoch': 2} {'type': 'loss', 'content': 0.16881121695041656, 'timestamp': '2025-10-01 04:27:37.375085', 'step': 11967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:37.408652', 'step': 11967, 'epoch': 2} {'type': 'loss', 'content': 0.14366650581359863, 'timestamp': '2025-10-01 04:27:37.433827', 'step': 11968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:37.474406', 'step': 11968, 'epoch': 2} {'type': 'loss', 'content': 0.08461577445268631, 'timestamp': '2025-10-01 04:27:37.476543', 'step': 11969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:37.519589', 'step': 11969, 'epoch': 2} {'type': 'loss', 'content': 0.13986745476722717, 'timestamp': '2025-10-01 04:27:37.521686', 'step': 11970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:37.555685', 'step': 11970, 'epoch': 2} {'type': 'loss', 'content': 0.10918166488409042, 'timestamp': '2025-10-01 04:27:37.558024', 'step': 11971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:37.592043', 'step': 11971, 'epoch': 2} {'type': 'loss', 'content': 0.1413308084011078, 'timestamp': '2025-10-01 04:27:37.615892', 'step': 11972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:37.668281', 'step': 11972, 'epoch': 2} {'type': 'loss', 'content': 0.13109324872493744, 'timestamp': '2025-10-01 04:27:37.670400', 'step': 11973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:37.708128', 'step': 11973, 'epoch': 2} {'type': 'loss', 'content': 0.14576837420463562, 'timestamp': '2025-10-01 04:27:37.710412', 'step': 11974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:37.750731', 'step': 11974, 'epoch': 2} {'type': 'loss', 'content': 0.1283218413591385, 'timestamp': '2025-10-01 04:27:37.753221', 'step': 11975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:37.803572', 'step': 11975, 'epoch': 2} {'type': 'loss', 'content': 0.09491743892431259, 'timestamp': '2025-10-01 04:27:37.827415', 'step': 11976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:37.863521', 'step': 11976, 'epoch': 2} {'type': 'loss', 'content': 0.08839444071054459, 'timestamp': '2025-10-01 04:27:37.865689', 'step': 11977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:37.898695', 'step': 11977, 'epoch': 2} {'type': 'loss', 'content': 0.12424613535404205, 'timestamp': '2025-10-01 04:27:37.900790', 'step': 11978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:37.942844', 'step': 11978, 'epoch': 2} {'type': 'loss', 'content': 0.13560454547405243, 'timestamp': '2025-10-01 04:27:37.945471', 'step': 11979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:37.981101', 'step': 11979, 'epoch': 2} {'type': 'loss', 'content': 0.12769557535648346, 'timestamp': '2025-10-01 04:27:38.005181', 'step': 11980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:38.044531', 'step': 11980, 'epoch': 2} {'type': 'loss', 'content': 0.11479908972978592, 'timestamp': '2025-10-01 04:27:38.046751', 'step': 11981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:38.096343', 'step': 11981, 'epoch': 2} {'type': 'loss', 'content': 0.16416364908218384, 'timestamp': '2025-10-01 04:27:38.098783', 'step': 11982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:38.132322', 'step': 11982, 'epoch': 2} {'type': 'loss', 'content': 0.08764931559562683, 'timestamp': '2025-10-01 04:27:38.134601', 'step': 11983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:38.167143', 'step': 11983, 'epoch': 2} {'type': 'loss', 'content': 0.09489434957504272, 'timestamp': '2025-10-01 04:27:38.190758', 'step': 11984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:38.228701', 'step': 11984, 'epoch': 2} {'type': 'loss', 'content': 0.06076088547706604, 'timestamp': '2025-10-01 04:27:38.236829', 'step': 11985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:38.271511', 'step': 11985, 'epoch': 2} {'type': 'loss', 'content': 0.09749632328748703, 'timestamp': '2025-10-01 04:27:38.273740', 'step': 11986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:38.308671', 'step': 11986, 'epoch': 2} {'type': 'loss', 'content': 0.11458900570869446, 'timestamp': '2025-10-01 04:27:38.311468', 'step': 11987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:38.346840', 'step': 11987, 'epoch': 2} {'type': 'loss', 'content': 0.050741225481033325, 'timestamp': '2025-10-01 04:27:38.370673', 'step': 11988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:38.414800', 'step': 11988, 'epoch': 2} {'type': 'loss', 'content': 0.15288402140140533, 'timestamp': '2025-10-01 04:27:38.417056', 'step': 11989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:38.462348', 'step': 11989, 'epoch': 2} {'type': 'loss', 'content': 0.10055143386125565, 'timestamp': '2025-10-01 04:27:38.469462', 'step': 11990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:38.503365', 'step': 11990, 'epoch': 2} {'type': 'loss', 'content': 0.0937386155128479, 'timestamp': '2025-10-01 04:27:38.505894', 'step': 11991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:38.550069', 'step': 11991, 'epoch': 2} {'type': 'loss', 'content': 0.11394360661506653, 'timestamp': '2025-10-01 04:27:38.573796', 'step': 11992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:38.609433', 'step': 11992, 'epoch': 2} {'type': 'loss', 'content': 0.04779382422566414, 'timestamp': '2025-10-01 04:27:38.611702', 'step': 11993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:38.648839', 'step': 11993, 'epoch': 2} {'type': 'loss', 'content': 0.23697148263454437, 'timestamp': '2025-10-01 04:27:38.650786', 'step': 11994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:38.688768', 'step': 11994, 'epoch': 2} {'type': 'loss', 'content': 0.10675279051065445, 'timestamp': '2025-10-01 04:27:38.690963', 'step': 11995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:38.741378', 'step': 11995, 'epoch': 2} {'type': 'loss', 'content': 0.11326568573713303, 'timestamp': '2025-10-01 04:27:38.765162', 'step': 11996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:38.800202', 'step': 11996, 'epoch': 2} {'type': 'loss', 'content': 0.10595118999481201, 'timestamp': '2025-10-01 04:27:38.803307', 'step': 11997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:38.856118', 'step': 11997, 'epoch': 2} {'type': 'loss', 'content': 0.18726618587970734, 'timestamp': '2025-10-01 04:27:38.868849', 'step': 11998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:38.904392', 'step': 11998, 'epoch': 2} {'type': 'loss', 'content': 0.07936592400074005, 'timestamp': '2025-10-01 04:27:38.906896', 'step': 11999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:38.956776', 'step': 11999, 'epoch': 2} {'type': 'loss', 'content': 0.09237229824066162, 'timestamp': '2025-10-01 04:27:38.980793', 'step': 12000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12000', 'timestamp': '2025-10-01 04:27:45.025528', 'step': 12000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:45.069146', 'step': 12000, 'epoch': 2} {'type': 'loss', 'content': 0.13501577079296112, 'timestamp': '2025-10-01 04:27:45.088473', 'step': 12001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:27:45.137206', 'step': 12001, 'epoch': 2} {'type': 'loss', 'content': 0.06149223446846008, 'timestamp': '2025-10-01 04:27:45.155092', 'step': 12002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:45.194422', 'step': 12002, 'epoch': 2} {'type': 'loss', 'content': 0.1700233370065689, 'timestamp': '2025-10-01 04:27:45.208151', 'step': 12003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:45.271548', 'step': 12003, 'epoch': 2} {'type': 'loss', 'content': 0.036244429647922516, 'timestamp': '2025-10-01 04:27:45.299049', 'step': 12004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:45.348240', 'step': 12004, 'epoch': 2} {'type': 'loss', 'content': 0.17733272910118103, 'timestamp': '2025-10-01 04:27:45.351038', 'step': 12005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:45.401519', 'step': 12005, 'epoch': 2} {'type': 'loss', 'content': 0.058187805116176605, 'timestamp': '2025-10-01 04:27:45.417859', 'step': 12006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:45.462007', 'step': 12006, 'epoch': 2} {'type': 'loss', 'content': 0.10143407434225082, 'timestamp': '2025-10-01 04:27:45.464632', 'step': 12007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:45.507305', 'step': 12007, 'epoch': 2} {'type': 'loss', 'content': 0.13249514997005463, 'timestamp': '2025-10-01 04:27:45.537106', 'step': 12008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:45.591006', 'step': 12008, 'epoch': 2} {'type': 'loss', 'content': 0.08904236555099487, 'timestamp': '2025-10-01 04:27:45.593228', 'step': 12009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:45.626685', 'step': 12009, 'epoch': 2} {'type': 'loss', 'content': 0.14500845968723297, 'timestamp': '2025-10-01 04:27:45.630181', 'step': 12010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:45.663415', 'step': 12010, 'epoch': 2} {'type': 'loss', 'content': 0.04394286125898361, 'timestamp': '2025-10-01 04:27:45.665710', 'step': 12011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:45.699493', 'step': 12011, 'epoch': 2} {'type': 'loss', 'content': 0.0777759701013565, 'timestamp': '2025-10-01 04:27:45.724511', 'step': 12012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:45.777032', 'step': 12012, 'epoch': 2} {'type': 'loss', 'content': 0.10368955135345459, 'timestamp': '2025-10-01 04:27:45.779622', 'step': 12013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:45.814843', 'step': 12013, 'epoch': 2} {'type': 'loss', 'content': 0.10869148373603821, 'timestamp': '2025-10-01 04:27:45.817025', 'step': 12014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:45.850591', 'step': 12014, 'epoch': 2} {'type': 'loss', 'content': 0.12246016412973404, 'timestamp': '2025-10-01 04:27:45.853281', 'step': 12015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:45.913178', 'step': 12015, 'epoch': 2} {'type': 'loss', 'content': 0.0737982839345932, 'timestamp': '2025-10-01 04:27:45.937269', 'step': 12016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:45.971877', 'step': 12016, 'epoch': 2} {'type': 'loss', 'content': 0.1162382960319519, 'timestamp': '2025-10-01 04:27:45.974696', 'step': 12017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:46.006982', 'step': 12017, 'epoch': 2} {'type': 'loss', 'content': 0.07526227086782455, 'timestamp': '2025-10-01 04:27:46.024732', 'step': 12018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:46.068076', 'step': 12018, 'epoch': 2} {'type': 'loss', 'content': 0.050340522080659866, 'timestamp': '2025-10-01 04:27:46.070754', 'step': 12019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:46.114258', 'step': 12019, 'epoch': 2} {'type': 'loss', 'content': 0.09533291310071945, 'timestamp': '2025-10-01 04:27:46.137838', 'step': 12020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:46.180325', 'step': 12020, 'epoch': 2} {'type': 'loss', 'content': 0.08628898113965988, 'timestamp': '2025-10-01 04:27:46.182641', 'step': 12021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:46.217467', 'step': 12021, 'epoch': 2} {'type': 'loss', 'content': 0.0716782733798027, 'timestamp': '2025-10-01 04:27:46.219705', 'step': 12022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:46.259237', 'step': 12022, 'epoch': 2} {'type': 'loss', 'content': 0.10370896756649017, 'timestamp': '2025-10-01 04:27:46.261481', 'step': 12023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:46.303207', 'step': 12023, 'epoch': 2} {'type': 'loss', 'content': 0.15273617208003998, 'timestamp': '2025-10-01 04:27:46.328034', 'step': 12024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:46.373930', 'step': 12024, 'epoch': 2} {'type': 'loss', 'content': 0.10181412845849991, 'timestamp': '2025-10-01 04:27:46.376170', 'step': 12025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:46.415994', 'step': 12025, 'epoch': 2} {'type': 'loss', 'content': 0.06153061240911484, 'timestamp': '2025-10-01 04:27:46.418222', 'step': 12026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:46.452600', 'step': 12026, 'epoch': 2} {'type': 'loss', 'content': 0.13364523649215698, 'timestamp': '2025-10-01 04:27:46.455082', 'step': 12027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:46.499941', 'step': 12027, 'epoch': 2} {'type': 'loss', 'content': 0.0887620821595192, 'timestamp': '2025-10-01 04:27:46.523731', 'step': 12028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:46.556337', 'step': 12028, 'epoch': 2} {'type': 'loss', 'content': 0.1167939230799675, 'timestamp': '2025-10-01 04:27:46.558568', 'step': 12029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:46.591310', 'step': 12029, 'epoch': 2} {'type': 'loss', 'content': 0.1827353984117508, 'timestamp': '2025-10-01 04:27:46.593807', 'step': 12030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:46.639883', 'step': 12030, 'epoch': 2} {'type': 'loss', 'content': 0.061732325702905655, 'timestamp': '2025-10-01 04:27:46.653231', 'step': 12031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:46.686313', 'step': 12031, 'epoch': 2} {'type': 'loss', 'content': 0.09518862515687943, 'timestamp': '2025-10-01 04:27:46.710227', 'step': 12032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:46.743756', 'step': 12032, 'epoch': 2} {'type': 'loss', 'content': 0.12364945560693741, 'timestamp': '2025-10-01 04:27:46.746267', 'step': 12033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:46.780792', 'step': 12033, 'epoch': 2} {'type': 'loss', 'content': 0.0692872554063797, 'timestamp': '2025-10-01 04:27:46.801054', 'step': 12034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:46.835674', 'step': 12034, 'epoch': 2} {'type': 'loss', 'content': 0.11177079379558563, 'timestamp': '2025-10-01 04:27:46.837975', 'step': 12035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:46.886439', 'step': 12035, 'epoch': 2} {'type': 'loss', 'content': 0.06402859091758728, 'timestamp': '2025-10-01 04:27:46.911081', 'step': 12036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:46.992344', 'step': 12036, 'epoch': 2} {'type': 'loss', 'content': 0.10864671319723129, 'timestamp': '2025-10-01 04:27:47.008694', 'step': 12037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:47.044021', 'step': 12037, 'epoch': 2} {'type': 'loss', 'content': 0.08403010666370392, 'timestamp': '2025-10-01 04:27:47.046130', 'step': 12038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:47.096003', 'step': 12038, 'epoch': 2} {'type': 'loss', 'content': 0.0393449142575264, 'timestamp': '2025-10-01 04:27:47.098663', 'step': 12039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:47.137358', 'step': 12039, 'epoch': 2} {'type': 'loss', 'content': 0.06885388493537903, 'timestamp': '2025-10-01 04:27:47.161088', 'step': 12040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:47.194377', 'step': 12040, 'epoch': 2} {'type': 'loss', 'content': 0.05686478689312935, 'timestamp': '2025-10-01 04:27:47.196418', 'step': 12041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:47.249073', 'step': 12041, 'epoch': 2} {'type': 'loss', 'content': 0.1189589649438858, 'timestamp': '2025-10-01 04:27:47.251579', 'step': 12042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:47.287597', 'step': 12042, 'epoch': 2} {'type': 'loss', 'content': 0.13618935644626617, 'timestamp': '2025-10-01 04:27:47.290423', 'step': 12043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:47.333477', 'step': 12043, 'epoch': 2} {'type': 'loss', 'content': 0.03533325344324112, 'timestamp': '2025-10-01 04:27:47.357208', 'step': 12044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:47.400218', 'step': 12044, 'epoch': 2} {'type': 'loss', 'content': 0.14668333530426025, 'timestamp': '2025-10-01 04:27:47.402510', 'step': 12045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:47.442192', 'step': 12045, 'epoch': 2} {'type': 'loss', 'content': 0.07301406562328339, 'timestamp': '2025-10-01 04:27:47.444802', 'step': 12046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:47.479744', 'step': 12046, 'epoch': 2} {'type': 'loss', 'content': 0.08659247308969498, 'timestamp': '2025-10-01 04:27:47.482317', 'step': 12047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:47.523662', 'step': 12047, 'epoch': 2} {'type': 'loss', 'content': 0.11060336977243423, 'timestamp': '2025-10-01 04:27:47.547388', 'step': 12048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:47.581287', 'step': 12048, 'epoch': 2} {'type': 'loss', 'content': 0.1072772815823555, 'timestamp': '2025-10-01 04:27:47.583613', 'step': 12049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:47.627192', 'step': 12049, 'epoch': 2} {'type': 'loss', 'content': 0.0745682567358017, 'timestamp': '2025-10-01 04:27:47.629443', 'step': 12050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:47.663541', 'step': 12050, 'epoch': 2} {'type': 'loss', 'content': 0.049728669226169586, 'timestamp': '2025-10-01 04:27:47.665803', 'step': 12051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:27:47.702066', 'step': 12051, 'epoch': 2} {'type': 'loss', 'content': 0.07510905712842941, 'timestamp': '2025-10-01 04:27:47.727552', 'step': 12052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:47.776427', 'step': 12052, 'epoch': 2} {'type': 'loss', 'content': 0.05839499458670616, 'timestamp': '2025-10-01 04:27:47.778549', 'step': 12053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:47.817360', 'step': 12053, 'epoch': 2} {'type': 'loss', 'content': 0.17565935850143433, 'timestamp': '2025-10-01 04:27:47.819496', 'step': 12054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:47.852385', 'step': 12054, 'epoch': 2} {'type': 'loss', 'content': 0.12216975539922714, 'timestamp': '2025-10-01 04:27:47.854630', 'step': 12055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:47.886495', 'step': 12055, 'epoch': 2} {'type': 'loss', 'content': 0.08467865735292435, 'timestamp': '2025-10-01 04:27:47.910191', 'step': 12056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:47.942124', 'step': 12056, 'epoch': 2} {'type': 'loss', 'content': 0.0999985784292221, 'timestamp': '2025-10-01 04:27:47.958940', 'step': 12057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:48.002632', 'step': 12057, 'epoch': 2} {'type': 'loss', 'content': 0.06527546793222427, 'timestamp': '2025-10-01 04:27:48.005251', 'step': 12058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:48.041602', 'step': 12058, 'epoch': 2} {'type': 'loss', 'content': 0.12011467665433884, 'timestamp': '2025-10-01 04:27:48.044209', 'step': 12059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:48.077782', 'step': 12059, 'epoch': 2} {'type': 'loss', 'content': 0.16456854343414307, 'timestamp': '2025-10-01 04:27:48.101864', 'step': 12060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:48.134115', 'step': 12060, 'epoch': 2} {'type': 'loss', 'content': 0.1351112276315689, 'timestamp': '2025-10-01 04:27:48.136640', 'step': 12061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:48.176587', 'step': 12061, 'epoch': 2} {'type': 'loss', 'content': 0.05776175484061241, 'timestamp': '2025-10-01 04:27:48.178926', 'step': 12062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:48.213252', 'step': 12062, 'epoch': 2} {'type': 'loss', 'content': 0.13072079420089722, 'timestamp': '2025-10-01 04:27:48.215954', 'step': 12063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:48.248341', 'step': 12063, 'epoch': 2} {'type': 'loss', 'content': 0.12078466266393661, 'timestamp': '2025-10-01 04:27:48.271992', 'step': 12064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:48.307221', 'step': 12064, 'epoch': 2} {'type': 'loss', 'content': 0.09892716258764267, 'timestamp': '2025-10-01 04:27:48.318156', 'step': 12065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:48.354159', 'step': 12065, 'epoch': 2} {'type': 'loss', 'content': 0.10229241102933884, 'timestamp': '2025-10-01 04:27:48.356330', 'step': 12066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:48.392087', 'step': 12066, 'epoch': 2} {'type': 'loss', 'content': 0.0532543770968914, 'timestamp': '2025-10-01 04:27:48.394768', 'step': 12067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:48.432686', 'step': 12067, 'epoch': 2} {'type': 'loss', 'content': 0.07862088829278946, 'timestamp': '2025-10-01 04:27:48.457050', 'step': 12068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:48.492182', 'step': 12068, 'epoch': 2} {'type': 'loss', 'content': 0.08746323734521866, 'timestamp': '2025-10-01 04:27:48.494937', 'step': 12069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:48.533741', 'step': 12069, 'epoch': 2} {'type': 'loss', 'content': 0.1073206290602684, 'timestamp': '2025-10-01 04:27:48.549566', 'step': 12070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:48.608670', 'step': 12070, 'epoch': 2} {'type': 'loss', 'content': 0.13157916069030762, 'timestamp': '2025-10-01 04:27:48.611578', 'step': 12071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:48.656914', 'step': 12071, 'epoch': 2} {'type': 'loss', 'content': 0.10907324403524399, 'timestamp': '2025-10-01 04:27:48.681380', 'step': 12072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:48.729356', 'step': 12072, 'epoch': 2} {'type': 'loss', 'content': 0.20685017108917236, 'timestamp': '2025-10-01 04:27:48.731781', 'step': 12073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:48.765240', 'step': 12073, 'epoch': 2} {'type': 'loss', 'content': 0.10149163007736206, 'timestamp': '2025-10-01 04:27:48.768258', 'step': 12074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:48.804431', 'step': 12074, 'epoch': 2} {'type': 'loss', 'content': 0.07318121939897537, 'timestamp': '2025-10-01 04:27:48.807874', 'step': 12075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:48.851709', 'step': 12075, 'epoch': 2} {'type': 'loss', 'content': 0.04735768213868141, 'timestamp': '2025-10-01 04:27:48.875619', 'step': 12076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:48.922048', 'step': 12076, 'epoch': 2} {'type': 'loss', 'content': 0.06044236198067665, 'timestamp': '2025-10-01 04:27:48.924570', 'step': 12077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:48.958128', 'step': 12077, 'epoch': 2} {'type': 'loss', 'content': 0.0392686128616333, 'timestamp': '2025-10-01 04:27:48.960422', 'step': 12078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:49.003456', 'step': 12078, 'epoch': 2} {'type': 'loss', 'content': 0.08694323152303696, 'timestamp': '2025-10-01 04:27:49.005762', 'step': 12079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:49.040881', 'step': 12079, 'epoch': 2} {'type': 'loss', 'content': 0.2131749540567398, 'timestamp': '2025-10-01 04:27:49.065579', 'step': 12080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:49.105108', 'step': 12080, 'epoch': 2} {'type': 'loss', 'content': 0.08503424376249313, 'timestamp': '2025-10-01 04:27:49.107464', 'step': 12081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:49.146982', 'step': 12081, 'epoch': 2} {'type': 'loss', 'content': 0.10079584270715714, 'timestamp': '2025-10-01 04:27:49.149419', 'step': 12082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:49.185535', 'step': 12082, 'epoch': 2} {'type': 'loss', 'content': 0.05229950323700905, 'timestamp': '2025-10-01 04:27:49.188634', 'step': 12083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:49.222918', 'step': 12083, 'epoch': 2} {'type': 'loss', 'content': 0.12407205253839493, 'timestamp': '2025-10-01 04:27:49.261771', 'step': 12084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:49.302862', 'step': 12084, 'epoch': 2} {'type': 'loss', 'content': 0.23876547813415527, 'timestamp': '2025-10-01 04:27:49.305296', 'step': 12085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:49.340568', 'step': 12085, 'epoch': 2} {'type': 'loss', 'content': 0.14298850297927856, 'timestamp': '2025-10-01 04:27:49.342888', 'step': 12086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:27:49.377843', 'step': 12086, 'epoch': 2} {'type': 'loss', 'content': 0.13963720202445984, 'timestamp': '2025-10-01 04:27:49.380518', 'step': 12087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:49.416458', 'step': 12087, 'epoch': 2} {'type': 'loss', 'content': 0.07642986625432968, 'timestamp': '2025-10-01 04:27:49.454859', 'step': 12088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:49.492260', 'step': 12088, 'epoch': 2} {'type': 'loss', 'content': 0.147798553109169, 'timestamp': '2025-10-01 04:27:49.494784', 'step': 12089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:49.547240', 'step': 12089, 'epoch': 2} {'type': 'loss', 'content': 0.15556056797504425, 'timestamp': '2025-10-01 04:27:49.549656', 'step': 12090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:49.584336', 'step': 12090, 'epoch': 2} {'type': 'loss', 'content': 0.0718744769692421, 'timestamp': '2025-10-01 04:27:49.587313', 'step': 12091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:49.621560', 'step': 12091, 'epoch': 2} {'type': 'loss', 'content': 0.09040823578834534, 'timestamp': '2025-10-01 04:27:49.646903', 'step': 12092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:49.689552', 'step': 12092, 'epoch': 2} {'type': 'loss', 'content': 0.14191274344921112, 'timestamp': '2025-10-01 04:27:49.692193', 'step': 12093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:49.724452', 'step': 12093, 'epoch': 2} {'type': 'loss', 'content': 0.07357592135667801, 'timestamp': '2025-10-01 04:27:49.727039', 'step': 12094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:49.784662', 'step': 12094, 'epoch': 2} {'type': 'loss', 'content': 0.0825863927602768, 'timestamp': '2025-10-01 04:27:49.787300', 'step': 12095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:49.832150', 'step': 12095, 'epoch': 2} {'type': 'loss', 'content': 0.05299411341547966, 'timestamp': '2025-10-01 04:27:49.855967', 'step': 12096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:49.901969', 'step': 12096, 'epoch': 2} {'type': 'loss', 'content': 0.1917310357093811, 'timestamp': '2025-10-01 04:27:49.904482', 'step': 12097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:49.954620', 'step': 12097, 'epoch': 2} {'type': 'loss', 'content': 0.13079333305358887, 'timestamp': '2025-10-01 04:27:49.957878', 'step': 12098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:49.997424', 'step': 12098, 'epoch': 2} {'type': 'loss', 'content': 0.16303348541259766, 'timestamp': '2025-10-01 04:27:50.000147', 'step': 12099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:50.045354', 'step': 12099, 'epoch': 2} {'type': 'loss', 'content': 0.1630392074584961, 'timestamp': '2025-10-01 04:27:50.069131', 'step': 12100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:50.102333', 'step': 12100, 'epoch': 2} {'type': 'loss', 'content': 0.1052544116973877, 'timestamp': '2025-10-01 04:27:50.105086', 'step': 12101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:27:50.146843', 'step': 12101, 'epoch': 2} {'type': 'loss', 'content': 0.09374009817838669, 'timestamp': '2025-10-01 04:27:50.149919', 'step': 12102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:50.186168', 'step': 12102, 'epoch': 2} {'type': 'loss', 'content': 0.0415683351457119, 'timestamp': '2025-10-01 04:27:50.188763', 'step': 12103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:50.232635', 'step': 12103, 'epoch': 2} {'type': 'loss', 'content': 0.1446862369775772, 'timestamp': '2025-10-01 04:27:50.257651', 'step': 12104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:50.291334', 'step': 12104, 'epoch': 2} {'type': 'loss', 'content': 0.09922575950622559, 'timestamp': '2025-10-01 04:27:50.294053', 'step': 12105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:50.338578', 'step': 12105, 'epoch': 2} {'type': 'loss', 'content': 0.06976146250963211, 'timestamp': '2025-10-01 04:27:50.341283', 'step': 12106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:50.376239', 'step': 12106, 'epoch': 2} {'type': 'loss', 'content': 0.14365917444229126, 'timestamp': '2025-10-01 04:27:50.378431', 'step': 12107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:50.414987', 'step': 12107, 'epoch': 2} {'type': 'loss', 'content': 0.07969213277101517, 'timestamp': '2025-10-01 04:27:50.439059', 'step': 12108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:27:50.480415', 'step': 12108, 'epoch': 2} {'type': 'loss', 'content': 0.06220214441418648, 'timestamp': '2025-10-01 04:27:50.482576', 'step': 12109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:50.526996', 'step': 12109, 'epoch': 2} {'type': 'loss', 'content': 0.11257866024971008, 'timestamp': '2025-10-01 04:27:50.529294', 'step': 12110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:50.574017', 'step': 12110, 'epoch': 2} {'type': 'loss', 'content': 0.20109407603740692, 'timestamp': '2025-10-01 04:27:50.576182', 'step': 12111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:27:50.631366', 'step': 12111, 'epoch': 2} {'type': 'loss', 'content': 0.04336325451731682, 'timestamp': '2025-10-01 04:27:50.654985', 'step': 12112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:50.692897', 'step': 12112, 'epoch': 2} {'type': 'loss', 'content': 0.08526629209518433, 'timestamp': '2025-10-01 04:27:50.695324', 'step': 12113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:27:50.741169', 'step': 12113, 'epoch': 2} {'type': 'loss', 'content': 0.2146700620651245, 'timestamp': '2025-10-01 04:27:50.743415', 'step': 12114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:50.789359', 'step': 12114, 'epoch': 2} {'type': 'loss', 'content': 0.11874638497829437, 'timestamp': '2025-10-01 04:27:50.791662', 'step': 12115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:27:50.823809', 'step': 12115, 'epoch': 2} {'type': 'loss', 'content': 0.05291512981057167, 'timestamp': '2025-10-01 04:27:50.847452', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:28:03.410441', 'step': 12116, 'epoch': 2} {'type': 'pplx', 'content': 12889.917170412302, 'timestamp': '2025-10-01 04:28:03.414330', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:03.446384', 'step': 12116, 'epoch': 2} {'type': 'loss', 'content': 0.0813283771276474, 'timestamp': '2025-10-01 04:28:03.448466', 'step': 12117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:03.484436', 'step': 12117, 'epoch': 2} {'type': 'loss', 'content': 0.11177565902471542, 'timestamp': '2025-10-01 04:28:03.486757', 'step': 12118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:03.519828', 'step': 12118, 'epoch': 2} {'type': 'loss', 'content': 0.09188227355480194, 'timestamp': '2025-10-01 04:28:03.522491', 'step': 12119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:03.562396', 'step': 12119, 'epoch': 2} {'type': 'loss', 'content': 0.15154579281806946, 'timestamp': '2025-10-01 04:28:03.586129', 'step': 12120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:03.627792', 'step': 12120, 'epoch': 2} {'type': 'loss', 'content': 0.05520252510905266, 'timestamp': '2025-10-01 04:28:03.629895', 'step': 12121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:03.675557', 'step': 12121, 'epoch': 2} {'type': 'loss', 'content': 0.08794409036636353, 'timestamp': '2025-10-01 04:28:03.683876', 'step': 12122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:03.718984', 'step': 12122, 'epoch': 2} {'type': 'loss', 'content': 0.1153707355260849, 'timestamp': '2025-10-01 04:28:03.721269', 'step': 12123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:03.756108', 'step': 12123, 'epoch': 2} {'type': 'loss', 'content': 0.12575915455818176, 'timestamp': '2025-10-01 04:28:03.781505', 'step': 12124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:03.841803', 'step': 12124, 'epoch': 2} {'type': 'loss', 'content': 0.07011494785547256, 'timestamp': '2025-10-01 04:28:03.843892', 'step': 12125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:03.880108', 'step': 12125, 'epoch': 2} {'type': 'loss', 'content': 0.1176309883594513, 'timestamp': '2025-10-01 04:28:03.882340', 'step': 12126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:03.915263', 'step': 12126, 'epoch': 2} {'type': 'loss', 'content': 0.06649105995893478, 'timestamp': '2025-10-01 04:28:03.918650', 'step': 12127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:03.966936', 'step': 12127, 'epoch': 2} {'type': 'loss', 'content': 0.13983023166656494, 'timestamp': '2025-10-01 04:28:03.991146', 'step': 12128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:04.027892', 'step': 12128, 'epoch': 2} {'type': 'loss', 'content': 0.10221147537231445, 'timestamp': '2025-10-01 04:28:04.031310', 'step': 12129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:04.077533', 'step': 12129, 'epoch': 2} {'type': 'loss', 'content': 0.15566153824329376, 'timestamp': '2025-10-01 04:28:04.080068', 'step': 12130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:04.116115', 'step': 12130, 'epoch': 2} {'type': 'loss', 'content': 0.15040329098701477, 'timestamp': '2025-10-01 04:28:04.118518', 'step': 12131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:04.165350', 'step': 12131, 'epoch': 2} {'type': 'loss', 'content': 0.19624851644039154, 'timestamp': '2025-10-01 04:28:04.189161', 'step': 12132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:04.224481', 'step': 12132, 'epoch': 2} {'type': 'loss', 'content': 0.11611633747816086, 'timestamp': '2025-10-01 04:28:04.226607', 'step': 12133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:04.261445', 'step': 12133, 'epoch': 2} {'type': 'loss', 'content': 0.12343937158584595, 'timestamp': '2025-10-01 04:28:04.263537', 'step': 12134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:04.314054', 'step': 12134, 'epoch': 2} {'type': 'loss', 'content': 0.13742844760417938, 'timestamp': '2025-10-01 04:28:04.316699', 'step': 12135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:04.352705', 'step': 12135, 'epoch': 2} {'type': 'loss', 'content': 0.12199237197637558, 'timestamp': '2025-10-01 04:28:04.376385', 'step': 12136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:04.423330', 'step': 12136, 'epoch': 2} {'type': 'loss', 'content': 0.1421026587486267, 'timestamp': '2025-10-01 04:28:04.425440', 'step': 12137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:04.458206', 'step': 12137, 'epoch': 2} {'type': 'loss', 'content': 0.1047518402338028, 'timestamp': '2025-10-01 04:28:04.460075', 'step': 12138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:04.509450', 'step': 12138, 'epoch': 2} {'type': 'loss', 'content': 0.16580498218536377, 'timestamp': '2025-10-01 04:28:04.511609', 'step': 12139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:04.543845', 'step': 12139, 'epoch': 2} {'type': 'loss', 'content': 0.09286908060312271, 'timestamp': '2025-10-01 04:28:04.567541', 'step': 12140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:04.602971', 'step': 12140, 'epoch': 2} {'type': 'loss', 'content': 0.0934101939201355, 'timestamp': '2025-10-01 04:28:04.605193', 'step': 12141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:04.640040', 'step': 12141, 'epoch': 2} {'type': 'loss', 'content': 0.13514606654644012, 'timestamp': '2025-10-01 04:28:04.642351', 'step': 12142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:04.678422', 'step': 12142, 'epoch': 2} {'type': 'loss', 'content': 0.03386217728257179, 'timestamp': '2025-10-01 04:28:04.680747', 'step': 12143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:04.715105', 'step': 12143, 'epoch': 2} {'type': 'loss', 'content': 0.12451260536909103, 'timestamp': '2025-10-01 04:28:04.738667', 'step': 12144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:04.771525', 'step': 12144, 'epoch': 2} {'type': 'loss', 'content': 0.0914921760559082, 'timestamp': '2025-10-01 04:28:04.773679', 'step': 12145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:04.807853', 'step': 12145, 'epoch': 2} {'type': 'loss', 'content': 0.18327508866786957, 'timestamp': '2025-10-01 04:28:04.810481', 'step': 12146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:04.842909', 'step': 12146, 'epoch': 2} {'type': 'loss', 'content': 0.10326600074768066, 'timestamp': '2025-10-01 04:28:04.845577', 'step': 12147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:04.879965', 'step': 12147, 'epoch': 2} {'type': 'loss', 'content': 0.08508595824241638, 'timestamp': '2025-10-01 04:28:04.903740', 'step': 12148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:04.954090', 'step': 12148, 'epoch': 2} {'type': 'loss', 'content': 0.04565657302737236, 'timestamp': '2025-10-01 04:28:04.956282', 'step': 12149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:04.991004', 'step': 12149, 'epoch': 2} {'type': 'loss', 'content': 0.05165825039148331, 'timestamp': '2025-10-01 04:28:04.993631', 'step': 12150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:05.037587', 'step': 12150, 'epoch': 2} {'type': 'loss', 'content': 0.12638212740421295, 'timestamp': '2025-10-01 04:28:05.039758', 'step': 12151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:05.085292', 'step': 12151, 'epoch': 2} {'type': 'loss', 'content': 0.05898251384496689, 'timestamp': '2025-10-01 04:28:05.109044', 'step': 12152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:05.155544', 'step': 12152, 'epoch': 2} {'type': 'loss', 'content': 0.07723794132471085, 'timestamp': '2025-10-01 04:28:05.157691', 'step': 12153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:05.193841', 'step': 12153, 'epoch': 2} {'type': 'loss', 'content': 0.13189001381397247, 'timestamp': '2025-10-01 04:28:05.195992', 'step': 12154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:05.242316', 'step': 12154, 'epoch': 2} {'type': 'loss', 'content': 0.07027862221002579, 'timestamp': '2025-10-01 04:28:05.245160', 'step': 12155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:05.280436', 'step': 12155, 'epoch': 2} {'type': 'loss', 'content': 0.09669231623411179, 'timestamp': '2025-10-01 04:28:05.305920', 'step': 12156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:05.341045', 'step': 12156, 'epoch': 2} {'type': 'loss', 'content': 0.054131098091602325, 'timestamp': '2025-10-01 04:28:05.343291', 'step': 12157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:05.389453', 'step': 12157, 'epoch': 2} {'type': 'loss', 'content': 0.1805487424135208, 'timestamp': '2025-10-01 04:28:05.391743', 'step': 12158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:05.424217', 'step': 12158, 'epoch': 2} {'type': 'loss', 'content': 0.10442349314689636, 'timestamp': '2025-10-01 04:28:05.426388', 'step': 12159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:05.466235', 'step': 12159, 'epoch': 2} {'type': 'loss', 'content': 0.2383364588022232, 'timestamp': '2025-10-01 04:28:05.489978', 'step': 12160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:05.525501', 'step': 12160, 'epoch': 2} {'type': 'loss', 'content': 0.11122739315032959, 'timestamp': '2025-10-01 04:28:05.527631', 'step': 12161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:05.573680', 'step': 12161, 'epoch': 2} {'type': 'loss', 'content': 0.06895022839307785, 'timestamp': '2025-10-01 04:28:05.575670', 'step': 12162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:05.616425', 'step': 12162, 'epoch': 2} {'type': 'loss', 'content': 0.08724350482225418, 'timestamp': '2025-10-01 04:28:05.618543', 'step': 12163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:05.651121', 'step': 12163, 'epoch': 2} {'type': 'loss', 'content': 0.1564674824476242, 'timestamp': '2025-10-01 04:28:05.674761', 'step': 12164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:05.710111', 'step': 12164, 'epoch': 2} {'type': 'loss', 'content': 0.08136074244976044, 'timestamp': '2025-10-01 04:28:05.712113', 'step': 12165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:05.748495', 'step': 12165, 'epoch': 2} {'type': 'loss', 'content': 0.10546018183231354, 'timestamp': '2025-10-01 04:28:05.750641', 'step': 12166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:05.783497', 'step': 12166, 'epoch': 2} {'type': 'loss', 'content': 0.07402072846889496, 'timestamp': '2025-10-01 04:28:05.785565', 'step': 12167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:05.817902', 'step': 12167, 'epoch': 2} {'type': 'loss', 'content': 0.13458603620529175, 'timestamp': '2025-10-01 04:28:05.841493', 'step': 12168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:05.886787', 'step': 12168, 'epoch': 2} {'type': 'loss', 'content': 0.059697091579437256, 'timestamp': '2025-10-01 04:28:05.888925', 'step': 12169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:05.922385', 'step': 12169, 'epoch': 2} {'type': 'loss', 'content': 0.09971689432859421, 'timestamp': '2025-10-01 04:28:05.924592', 'step': 12170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:05.968612', 'step': 12170, 'epoch': 2} {'type': 'loss', 'content': 0.1363486796617508, 'timestamp': '2025-10-01 04:28:05.970902', 'step': 12171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.005441', 'step': 12171, 'epoch': 2} {'type': 'loss', 'content': 0.13029001653194427, 'timestamp': '2025-10-01 04:28:06.030054', 'step': 12172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.064494', 'step': 12172, 'epoch': 2} {'type': 'loss', 'content': 0.1854124814271927, 'timestamp': '2025-10-01 04:28:06.066747', 'step': 12173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:06.101046', 'step': 12173, 'epoch': 2} {'type': 'loss', 'content': 0.2899629771709442, 'timestamp': '2025-10-01 04:28:06.103159', 'step': 12174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:06.141713', 'step': 12174, 'epoch': 2} {'type': 'loss', 'content': 0.1969829499721527, 'timestamp': '2025-10-01 04:28:06.144423', 'step': 12175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.178555', 'step': 12175, 'epoch': 2} {'type': 'loss', 'content': 0.15988823771476746, 'timestamp': '2025-10-01 04:28:06.202619', 'step': 12176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.234957', 'step': 12176, 'epoch': 2} {'type': 'loss', 'content': 0.12803879380226135, 'timestamp': '2025-10-01 04:28:06.237404', 'step': 12177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.283568', 'step': 12177, 'epoch': 2} {'type': 'loss', 'content': 0.10313145816326141, 'timestamp': '2025-10-01 04:28:06.286819', 'step': 12178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:06.319131', 'step': 12178, 'epoch': 2} {'type': 'loss', 'content': 0.13987107574939728, 'timestamp': '2025-10-01 04:28:06.321765', 'step': 12179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:06.356046', 'step': 12179, 'epoch': 2} {'type': 'loss', 'content': 0.036640603095293045, 'timestamp': '2025-10-01 04:28:06.385861', 'step': 12180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.419472', 'step': 12180, 'epoch': 2} {'type': 'loss', 'content': 0.19249093532562256, 'timestamp': '2025-10-01 04:28:06.426081', 'step': 12181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:06.458202', 'step': 12181, 'epoch': 2} {'type': 'loss', 'content': 0.08004650473594666, 'timestamp': '2025-10-01 04:28:06.461766', 'step': 12182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.504717', 'step': 12182, 'epoch': 2} {'type': 'loss', 'content': 0.06443022191524506, 'timestamp': '2025-10-01 04:28:06.507421', 'step': 12183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:06.542398', 'step': 12183, 'epoch': 2} {'type': 'loss', 'content': 0.04410473257303238, 'timestamp': '2025-10-01 04:28:06.566223', 'step': 12184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.602049', 'step': 12184, 'epoch': 2} {'type': 'loss', 'content': 0.05399578437209129, 'timestamp': '2025-10-01 04:28:06.605012', 'step': 12185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:06.643623', 'step': 12185, 'epoch': 2} {'type': 'loss', 'content': 0.24518559873104095, 'timestamp': '2025-10-01 04:28:06.646705', 'step': 12186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.679737', 'step': 12186, 'epoch': 2} {'type': 'loss', 'content': 0.18167917430400848, 'timestamp': '2025-10-01 04:28:06.682056', 'step': 12187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:06.722559', 'step': 12187, 'epoch': 2} {'type': 'loss', 'content': 0.19553473591804504, 'timestamp': '2025-10-01 04:28:06.746224', 'step': 12188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:06.778671', 'step': 12188, 'epoch': 2} {'type': 'loss', 'content': 0.04703439027070999, 'timestamp': '2025-10-01 04:28:06.781263', 'step': 12189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:06.832999', 'step': 12189, 'epoch': 2} {'type': 'loss', 'content': 0.048742447048425674, 'timestamp': '2025-10-01 04:28:06.835498', 'step': 12190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:06.868902', 'step': 12190, 'epoch': 2} {'type': 'loss', 'content': 0.10904560983181, 'timestamp': '2025-10-01 04:28:06.871424', 'step': 12191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.904384', 'step': 12191, 'epoch': 2} {'type': 'loss', 'content': 0.10831956565380096, 'timestamp': '2025-10-01 04:28:06.928157', 'step': 12192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:06.961013', 'step': 12192, 'epoch': 2} {'type': 'loss', 'content': 0.11479879915714264, 'timestamp': '2025-10-01 04:28:06.974682', 'step': 12193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:07.009340', 'step': 12193, 'epoch': 2} {'type': 'loss', 'content': 0.08043335378170013, 'timestamp': '2025-10-01 04:28:07.011639', 'step': 12194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:07.053834', 'step': 12194, 'epoch': 2} {'type': 'loss', 'content': 0.2080952376127243, 'timestamp': '2025-10-01 04:28:07.056497', 'step': 12195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:07.100176', 'step': 12195, 'epoch': 2} {'type': 'loss', 'content': 0.10382253676652908, 'timestamp': '2025-10-01 04:28:07.124175', 'step': 12196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:07.157341', 'step': 12196, 'epoch': 2} {'type': 'loss', 'content': 0.04850747063755989, 'timestamp': '2025-10-01 04:28:07.159862', 'step': 12197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:07.207553', 'step': 12197, 'epoch': 2} {'type': 'loss', 'content': 0.11248404532670975, 'timestamp': '2025-10-01 04:28:07.210042', 'step': 12198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:07.242076', 'step': 12198, 'epoch': 2} {'type': 'loss', 'content': 0.0691995844244957, 'timestamp': '2025-10-01 04:28:07.245352', 'step': 12199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:07.286523', 'step': 12199, 'epoch': 2} {'type': 'loss', 'content': 0.09432046115398407, 'timestamp': '2025-10-01 04:28:07.310925', 'step': 12200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:07.345199', 'step': 12200, 'epoch': 2} {'type': 'loss', 'content': 0.14717069268226624, 'timestamp': '2025-10-01 04:28:07.347568', 'step': 12201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:07.380229', 'step': 12201, 'epoch': 2} {'type': 'loss', 'content': 0.04734164848923683, 'timestamp': '2025-10-01 04:28:07.382892', 'step': 12202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:07.433315', 'step': 12202, 'epoch': 2} {'type': 'loss', 'content': 0.12511298060417175, 'timestamp': '2025-10-01 04:28:07.435980', 'step': 12203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:07.469375', 'step': 12203, 'epoch': 2} {'type': 'loss', 'content': 0.022546693682670593, 'timestamp': '2025-10-01 04:28:07.494103', 'step': 12204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:07.542395', 'step': 12204, 'epoch': 2} {'type': 'loss', 'content': 0.08037591725587845, 'timestamp': '2025-10-01 04:28:07.544730', 'step': 12205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:07.586550', 'step': 12205, 'epoch': 2} {'type': 'loss', 'content': 0.22638970613479614, 'timestamp': '2025-10-01 04:28:07.589861', 'step': 12206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:07.624942', 'step': 12206, 'epoch': 2} {'type': 'loss', 'content': 0.10697154700756073, 'timestamp': '2025-10-01 04:28:07.627277', 'step': 12207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:07.659541', 'step': 12207, 'epoch': 2} {'type': 'loss', 'content': 0.06496885418891907, 'timestamp': '2025-10-01 04:28:07.683678', 'step': 12208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:07.715759', 'step': 12208, 'epoch': 2} {'type': 'loss', 'content': 0.12519404292106628, 'timestamp': '2025-10-01 04:28:07.719880', 'step': 12209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:07.777677', 'step': 12209, 'epoch': 2} {'type': 'loss', 'content': 0.07509076595306396, 'timestamp': '2025-10-01 04:28:07.779776', 'step': 12210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:07.823640', 'step': 12210, 'epoch': 2} {'type': 'loss', 'content': 0.1609848588705063, 'timestamp': '2025-10-01 04:28:07.825540', 'step': 12211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:07.860888', 'step': 12211, 'epoch': 2} {'type': 'loss', 'content': 0.1275007426738739, 'timestamp': '2025-10-01 04:28:07.886013', 'step': 12212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:07.920012', 'step': 12212, 'epoch': 2} {'type': 'loss', 'content': 0.1566890925168991, 'timestamp': '2025-10-01 04:28:07.922133', 'step': 12213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:07.976532', 'step': 12213, 'epoch': 2} {'type': 'loss', 'content': 0.1717882752418518, 'timestamp': '2025-10-01 04:28:07.978882', 'step': 12214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:08.014626', 'step': 12214, 'epoch': 2} {'type': 'loss', 'content': 0.12991204857826233, 'timestamp': '2025-10-01 04:28:08.016983', 'step': 12215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:08.065677', 'step': 12215, 'epoch': 2} {'type': 'loss', 'content': 0.1461043655872345, 'timestamp': '2025-10-01 04:28:08.089222', 'step': 12216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:08.140316', 'step': 12216, 'epoch': 2} {'type': 'loss', 'content': 0.12423486262559891, 'timestamp': '2025-10-01 04:28:08.142291', 'step': 12217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:08.188620', 'step': 12217, 'epoch': 2} {'type': 'loss', 'content': 0.09080693870782852, 'timestamp': '2025-10-01 04:28:08.190522', 'step': 12218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:08.223517', 'step': 12218, 'epoch': 2} {'type': 'loss', 'content': 0.1274254322052002, 'timestamp': '2025-10-01 04:28:08.226898', 'step': 12219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:08.266702', 'step': 12219, 'epoch': 2} {'type': 'loss', 'content': 0.08964549750089645, 'timestamp': '2025-10-01 04:28:08.290089', 'step': 12220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:08.324362', 'step': 12220, 'epoch': 2} {'type': 'loss', 'content': 0.1306690275669098, 'timestamp': '2025-10-01 04:28:08.326455', 'step': 12221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:08.386096', 'step': 12221, 'epoch': 2} {'type': 'loss', 'content': 0.03311010077595711, 'timestamp': '2025-10-01 04:28:08.388457', 'step': 12222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:08.421562', 'step': 12222, 'epoch': 2} {'type': 'loss', 'content': 0.039104677736759186, 'timestamp': '2025-10-01 04:28:08.424569', 'step': 12223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:08.460481', 'step': 12223, 'epoch': 2} {'type': 'loss', 'content': 0.09571054577827454, 'timestamp': '2025-10-01 04:28:08.484173', 'step': 12224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:08.516333', 'step': 12224, 'epoch': 2} {'type': 'loss', 'content': 0.0943426564335823, 'timestamp': '2025-10-01 04:28:08.518459', 'step': 12225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:08.565216', 'step': 12225, 'epoch': 2} {'type': 'loss', 'content': 0.2513485550880432, 'timestamp': '2025-10-01 04:28:08.567680', 'step': 12226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:08.608790', 'step': 12226, 'epoch': 2} {'type': 'loss', 'content': 0.0800776332616806, 'timestamp': '2025-10-01 04:28:08.610990', 'step': 12227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:08.660862', 'step': 12227, 'epoch': 2} {'type': 'loss', 'content': 0.14054454863071442, 'timestamp': '2025-10-01 04:28:08.684407', 'step': 12228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:08.717055', 'step': 12228, 'epoch': 2} {'type': 'loss', 'content': 0.08782752603292465, 'timestamp': '2025-10-01 04:28:08.719268', 'step': 12229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:08.751934', 'step': 12229, 'epoch': 2} {'type': 'loss', 'content': 0.15375767648220062, 'timestamp': '2025-10-01 04:28:08.754482', 'step': 12230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:08.786399', 'step': 12230, 'epoch': 2} {'type': 'loss', 'content': 0.05248008295893669, 'timestamp': '2025-10-01 04:28:08.788977', 'step': 12231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:08.826023', 'step': 12231, 'epoch': 2} {'type': 'loss', 'content': 0.04953557997941971, 'timestamp': '2025-10-01 04:28:08.853632', 'step': 12232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:08.886932', 'step': 12232, 'epoch': 2} {'type': 'loss', 'content': 0.15483593940734863, 'timestamp': '2025-10-01 04:28:08.889043', 'step': 12233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:08.948318', 'step': 12233, 'epoch': 2} {'type': 'loss', 'content': 0.14937099814414978, 'timestamp': '2025-10-01 04:28:08.950696', 'step': 12234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:08.985531', 'step': 12234, 'epoch': 2} {'type': 'loss', 'content': 0.1447368860244751, 'timestamp': '2025-10-01 04:28:08.988332', 'step': 12235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:09.021909', 'step': 12235, 'epoch': 2} {'type': 'loss', 'content': 0.05515371635556221, 'timestamp': '2025-10-01 04:28:09.045529', 'step': 12236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:09.089456', 'step': 12236, 'epoch': 2} {'type': 'loss', 'content': 0.10695469379425049, 'timestamp': '2025-10-01 04:28:09.091534', 'step': 12237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:09.133475', 'step': 12237, 'epoch': 2} {'type': 'loss', 'content': 0.07145167887210846, 'timestamp': '2025-10-01 04:28:09.135291', 'step': 12238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:09.170724', 'step': 12238, 'epoch': 2} {'type': 'loss', 'content': 0.0963563323020935, 'timestamp': '2025-10-01 04:28:09.172943', 'step': 12239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:09.204313', 'step': 12239, 'epoch': 2} {'type': 'loss', 'content': 0.09758757054805756, 'timestamp': '2025-10-01 04:28:09.228079', 'step': 12240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:09.261977', 'step': 12240, 'epoch': 2} {'type': 'loss', 'content': 0.2039705514907837, 'timestamp': '2025-10-01 04:28:09.263906', 'step': 12241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:09.312006', 'step': 12241, 'epoch': 2} {'type': 'loss', 'content': 0.1905316263437271, 'timestamp': '2025-10-01 04:28:09.314143', 'step': 12242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:09.356364', 'step': 12242, 'epoch': 2} {'type': 'loss', 'content': 0.07220709323883057, 'timestamp': '2025-10-01 04:28:09.358592', 'step': 12243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:09.391616', 'step': 12243, 'epoch': 2} {'type': 'loss', 'content': 0.23920932412147522, 'timestamp': '2025-10-01 04:28:09.415383', 'step': 12244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:09.452174', 'step': 12244, 'epoch': 2} {'type': 'loss', 'content': 0.08846762776374817, 'timestamp': '2025-10-01 04:28:09.468698', 'step': 12245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:09.513660', 'step': 12245, 'epoch': 2} {'type': 'loss', 'content': 0.08715906739234924, 'timestamp': '2025-10-01 04:28:09.515994', 'step': 12246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:09.551180', 'step': 12246, 'epoch': 2} {'type': 'loss', 'content': 0.06336256116628647, 'timestamp': '2025-10-01 04:28:09.564784', 'step': 12247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:09.596561', 'step': 12247, 'epoch': 2} {'type': 'loss', 'content': 0.059002917259931564, 'timestamp': '2025-10-01 04:28:09.625640', 'step': 12248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:09.659915', 'step': 12248, 'epoch': 2} {'type': 'loss', 'content': 0.1103440374135971, 'timestamp': '2025-10-01 04:28:09.662002', 'step': 12249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:09.698483', 'step': 12249, 'epoch': 2} {'type': 'loss', 'content': 0.12497326731681824, 'timestamp': '2025-10-01 04:28:09.700360', 'step': 12250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:09.734796', 'step': 12250, 'epoch': 2} {'type': 'loss', 'content': 0.07887904345989227, 'timestamp': '2025-10-01 04:28:09.737171', 'step': 12251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:09.771188', 'step': 12251, 'epoch': 2} {'type': 'loss', 'content': 0.20997768640518188, 'timestamp': '2025-10-01 04:28:09.796492', 'step': 12252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:09.830897', 'step': 12252, 'epoch': 2} {'type': 'loss', 'content': 0.18244314193725586, 'timestamp': '2025-10-01 04:28:09.844943', 'step': 12253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:09.886778', 'step': 12253, 'epoch': 2} {'type': 'loss', 'content': 0.10934209823608398, 'timestamp': '2025-10-01 04:28:09.888661', 'step': 12254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:09.919979', 'step': 12254, 'epoch': 2} {'type': 'loss', 'content': 0.11151215434074402, 'timestamp': '2025-10-01 04:28:09.922609', 'step': 12255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:09.954512', 'step': 12255, 'epoch': 2} {'type': 'loss', 'content': 0.08501899242401123, 'timestamp': '2025-10-01 04:28:09.990853', 'step': 12256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:10.027231', 'step': 12256, 'epoch': 2} {'type': 'loss', 'content': 0.13779400289058685, 'timestamp': '2025-10-01 04:28:10.029353', 'step': 12257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:10.067620', 'step': 12257, 'epoch': 2} {'type': 'loss', 'content': 0.12975622713565826, 'timestamp': '2025-10-01 04:28:10.069682', 'step': 12258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:10.115151', 'step': 12258, 'epoch': 2} {'type': 'loss', 'content': 0.08702811598777771, 'timestamp': '2025-10-01 04:28:10.117133', 'step': 12259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:10.156580', 'step': 12259, 'epoch': 2} {'type': 'loss', 'content': 0.07618766278028488, 'timestamp': '2025-10-01 04:28:10.180219', 'step': 12260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:10.222091', 'step': 12260, 'epoch': 2} {'type': 'loss', 'content': 0.06722904741764069, 'timestamp': '2025-10-01 04:28:10.224062', 'step': 12261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:10.265898', 'step': 12261, 'epoch': 2} {'type': 'loss', 'content': 0.1210305318236351, 'timestamp': '2025-10-01 04:28:10.268634', 'step': 12262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:10.322583', 'step': 12262, 'epoch': 2} {'type': 'loss', 'content': 0.18556353449821472, 'timestamp': '2025-10-01 04:28:10.333879', 'step': 12263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:10.366609', 'step': 12263, 'epoch': 2} {'type': 'loss', 'content': 0.12786054611206055, 'timestamp': '2025-10-01 04:28:10.390529', 'step': 12264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:10.426654', 'step': 12264, 'epoch': 2} {'type': 'loss', 'content': 0.23764343559741974, 'timestamp': '2025-10-01 04:28:10.428909', 'step': 12265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:10.475688', 'step': 12265, 'epoch': 2} {'type': 'loss', 'content': 0.11255928128957748, 'timestamp': '2025-10-01 04:28:10.478106', 'step': 12266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:10.529312', 'step': 12266, 'epoch': 2} {'type': 'loss', 'content': 0.042381685227155685, 'timestamp': '2025-10-01 04:28:10.533767', 'step': 12267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:10.566553', 'step': 12267, 'epoch': 2} {'type': 'loss', 'content': 0.13323535025119781, 'timestamp': '2025-10-01 04:28:10.590330', 'step': 12268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:10.626894', 'step': 12268, 'epoch': 2} {'type': 'loss', 'content': 0.08392493426799774, 'timestamp': '2025-10-01 04:28:10.628772', 'step': 12269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:10.666342', 'step': 12269, 'epoch': 2} {'type': 'loss', 'content': 0.1549723744392395, 'timestamp': '2025-10-01 04:28:10.669365', 'step': 12270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:10.714858', 'step': 12270, 'epoch': 2} {'type': 'loss', 'content': 0.11313986033201218, 'timestamp': '2025-10-01 04:28:10.717080', 'step': 12271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:10.753352', 'step': 12271, 'epoch': 2} {'type': 'loss', 'content': 0.12866072356700897, 'timestamp': '2025-10-01 04:28:10.777125', 'step': 12272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:10.813212', 'step': 12272, 'epoch': 2} {'type': 'loss', 'content': 0.14555668830871582, 'timestamp': '2025-10-01 04:28:10.818175', 'step': 12273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:10.858797', 'step': 12273, 'epoch': 2} {'type': 'loss', 'content': 0.1193024143576622, 'timestamp': '2025-10-01 04:28:10.863714', 'step': 12274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:10.899887', 'step': 12274, 'epoch': 2} {'type': 'loss', 'content': 0.07748214900493622, 'timestamp': '2025-10-01 04:28:10.902380', 'step': 12275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:10.948990', 'step': 12275, 'epoch': 2} {'type': 'loss', 'content': 0.0703808143734932, 'timestamp': '2025-10-01 04:28:10.972601', 'step': 12276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:11.020264', 'step': 12276, 'epoch': 2} {'type': 'loss', 'content': 0.13141527771949768, 'timestamp': '2025-10-01 04:28:11.024582', 'step': 12277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:11.061663', 'step': 12277, 'epoch': 2} {'type': 'loss', 'content': 0.09583986550569534, 'timestamp': '2025-10-01 04:28:11.064057', 'step': 12278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:11.102856', 'step': 12278, 'epoch': 2} {'type': 'loss', 'content': 0.07198343425989151, 'timestamp': '2025-10-01 04:28:11.110045', 'step': 12279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:11.157565', 'step': 12279, 'epoch': 2} {'type': 'loss', 'content': 0.06966718286275864, 'timestamp': '2025-10-01 04:28:11.181125', 'step': 12280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:11.211684', 'step': 12280, 'epoch': 2} {'type': 'loss', 'content': 0.03270600363612175, 'timestamp': '2025-10-01 04:28:11.214330', 'step': 12281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:11.251115', 'step': 12281, 'epoch': 2} {'type': 'loss', 'content': 0.1610046625137329, 'timestamp': '2025-10-01 04:28:11.253185', 'step': 12282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:11.288635', 'step': 12282, 'epoch': 2} {'type': 'loss', 'content': 0.011131753213703632, 'timestamp': '2025-10-01 04:28:11.291443', 'step': 12283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:11.328827', 'step': 12283, 'epoch': 2} {'type': 'loss', 'content': 0.04768596962094307, 'timestamp': '2025-10-01 04:28:11.352511', 'step': 12284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:11.388724', 'step': 12284, 'epoch': 2} {'type': 'loss', 'content': 0.06154707446694374, 'timestamp': '2025-10-01 04:28:11.390886', 'step': 12285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:11.425812', 'step': 12285, 'epoch': 2} {'type': 'loss', 'content': 0.08154813945293427, 'timestamp': '2025-10-01 04:28:11.429896', 'step': 12286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:11.469747', 'step': 12286, 'epoch': 2} {'type': 'loss', 'content': 0.031106863170862198, 'timestamp': '2025-10-01 04:28:11.471845', 'step': 12287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:11.504750', 'step': 12287, 'epoch': 2} {'type': 'loss', 'content': 0.05339212715625763, 'timestamp': '2025-10-01 04:28:11.528189', 'step': 12288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:11.570735', 'step': 12288, 'epoch': 2} {'type': 'loss', 'content': 0.0811455026268959, 'timestamp': '2025-10-01 04:28:11.572578', 'step': 12289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:11.614208', 'step': 12289, 'epoch': 2} {'type': 'loss', 'content': 0.0923311859369278, 'timestamp': '2025-10-01 04:28:11.616297', 'step': 12290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:11.656057', 'step': 12290, 'epoch': 2} {'type': 'loss', 'content': 0.10179900377988815, 'timestamp': '2025-10-01 04:28:11.658277', 'step': 12291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:11.694870', 'step': 12291, 'epoch': 2} {'type': 'loss', 'content': 0.13555464148521423, 'timestamp': '2025-10-01 04:28:11.718680', 'step': 12292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:11.754725', 'step': 12292, 'epoch': 2} {'type': 'loss', 'content': 0.05736055225133896, 'timestamp': '2025-10-01 04:28:11.756947', 'step': 12293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:11.795657', 'step': 12293, 'epoch': 2} {'type': 'loss', 'content': 0.07045940309762955, 'timestamp': '2025-10-01 04:28:11.797794', 'step': 12294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:11.834956', 'step': 12294, 'epoch': 2} {'type': 'loss', 'content': 0.04875199496746063, 'timestamp': '2025-10-01 04:28:11.837424', 'step': 12295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:11.877133', 'step': 12295, 'epoch': 2} {'type': 'loss', 'content': 0.10957121104001999, 'timestamp': '2025-10-01 04:28:11.900627', 'step': 12296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:11.939082', 'step': 12296, 'epoch': 2} {'type': 'loss', 'content': 0.048721764236688614, 'timestamp': '2025-10-01 04:28:11.941200', 'step': 12297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:11.979741', 'step': 12297, 'epoch': 2} {'type': 'loss', 'content': 0.11058249324560165, 'timestamp': '2025-10-01 04:28:11.981826', 'step': 12298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:12.028210', 'step': 12298, 'epoch': 2} {'type': 'loss', 'content': 0.1630241870880127, 'timestamp': '2025-10-01 04:28:12.030273', 'step': 12299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:12.063034', 'step': 12299, 'epoch': 2} {'type': 'loss', 'content': 0.08588124066591263, 'timestamp': '2025-10-01 04:28:12.086447', 'step': 12300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:12.145282', 'step': 12300, 'epoch': 2} {'type': 'loss', 'content': 0.1275549679994583, 'timestamp': '2025-10-01 04:28:12.147422', 'step': 12301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:12.187274', 'step': 12301, 'epoch': 2} {'type': 'loss', 'content': 0.10202158987522125, 'timestamp': '2025-10-01 04:28:12.190253', 'step': 12302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:12.228528', 'step': 12302, 'epoch': 2} {'type': 'loss', 'content': 0.17348423600196838, 'timestamp': '2025-10-01 04:28:12.231136', 'step': 12303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:12.274499', 'step': 12303, 'epoch': 2} {'type': 'loss', 'content': 0.08521920442581177, 'timestamp': '2025-10-01 04:28:12.298328', 'step': 12304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:12.332141', 'step': 12304, 'epoch': 2} {'type': 'loss', 'content': 0.13088162243366241, 'timestamp': '2025-10-01 04:28:12.337102', 'step': 12305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:12.369194', 'step': 12305, 'epoch': 2} {'type': 'loss', 'content': 0.05725889280438423, 'timestamp': '2025-10-01 04:28:12.371360', 'step': 12306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:12.406477', 'step': 12306, 'epoch': 2} {'type': 'loss', 'content': 0.12099102139472961, 'timestamp': '2025-10-01 04:28:12.408506', 'step': 12307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:12.440718', 'step': 12307, 'epoch': 2} {'type': 'loss', 'content': 0.10619713366031647, 'timestamp': '2025-10-01 04:28:12.464036', 'step': 12308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:12.495803', 'step': 12308, 'epoch': 2} {'type': 'loss', 'content': 0.18490812182426453, 'timestamp': '2025-10-01 04:28:12.497772', 'step': 12309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:12.538049', 'step': 12309, 'epoch': 2} {'type': 'loss', 'content': 0.12397975474596024, 'timestamp': '2025-10-01 04:28:12.540737', 'step': 12310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:12.579401', 'step': 12310, 'epoch': 2} {'type': 'loss', 'content': 0.10733781009912491, 'timestamp': '2025-10-01 04:28:12.582066', 'step': 12311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:12.623512', 'step': 12311, 'epoch': 2} {'type': 'loss', 'content': 0.12849321961402893, 'timestamp': '2025-10-01 04:28:12.647156', 'step': 12312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:12.689918', 'step': 12312, 'epoch': 2} {'type': 'loss', 'content': 0.04756259173154831, 'timestamp': '2025-10-01 04:28:12.692052', 'step': 12313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:12.726348', 'step': 12313, 'epoch': 2} {'type': 'loss', 'content': 0.07668665796518326, 'timestamp': '2025-10-01 04:28:12.728464', 'step': 12314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:12.774989', 'step': 12314, 'epoch': 2} {'type': 'loss', 'content': 0.09286090731620789, 'timestamp': '2025-10-01 04:28:12.778463', 'step': 12315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:12.811158', 'step': 12315, 'epoch': 2} {'type': 'loss', 'content': 0.12009631097316742, 'timestamp': '2025-10-01 04:28:12.835144', 'step': 12316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:12.881495', 'step': 12316, 'epoch': 2} {'type': 'loss', 'content': 0.04635747894644737, 'timestamp': '2025-10-01 04:28:12.884121', 'step': 12317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:12.934675', 'step': 12317, 'epoch': 2} {'type': 'loss', 'content': 0.23051977157592773, 'timestamp': '2025-10-01 04:28:12.936786', 'step': 12318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:12.974975', 'step': 12318, 'epoch': 2} {'type': 'loss', 'content': 0.13326731324195862, 'timestamp': '2025-10-01 04:28:12.982811', 'step': 12319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:13.022871', 'step': 12319, 'epoch': 2} {'type': 'loss', 'content': 0.08283571898937225, 'timestamp': '2025-10-01 04:28:13.046758', 'step': 12320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:13.083343', 'step': 12320, 'epoch': 2} {'type': 'loss', 'content': 0.13007190823554993, 'timestamp': '2025-10-01 04:28:13.085431', 'step': 12321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:13.126192', 'step': 12321, 'epoch': 2} {'type': 'loss', 'content': 0.11122103780508041, 'timestamp': '2025-10-01 04:28:13.128921', 'step': 12322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:13.162149', 'step': 12322, 'epoch': 2} {'type': 'loss', 'content': 0.15557540953159332, 'timestamp': '2025-10-01 04:28:13.164492', 'step': 12323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:13.197396', 'step': 12323, 'epoch': 2} {'type': 'loss', 'content': 0.06162400543689728, 'timestamp': '2025-10-01 04:28:13.231906', 'step': 12324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:13.266806', 'step': 12324, 'epoch': 2} {'type': 'loss', 'content': 0.10752097517251968, 'timestamp': '2025-10-01 04:28:13.268914', 'step': 12325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:28:13.305192', 'step': 12325, 'epoch': 2} {'type': 'loss', 'content': 0.1655481904745102, 'timestamp': '2025-10-01 04:28:13.309446', 'step': 12326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:13.349325', 'step': 12326, 'epoch': 2} {'type': 'loss', 'content': 0.12153981626033783, 'timestamp': '2025-10-01 04:28:13.351506', 'step': 12327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:13.383798', 'step': 12327, 'epoch': 2} {'type': 'loss', 'content': 0.12519440054893494, 'timestamp': '2025-10-01 04:28:13.407590', 'step': 12328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:13.457481', 'step': 12328, 'epoch': 2} {'type': 'loss', 'content': 0.24032069742679596, 'timestamp': '2025-10-01 04:28:13.460323', 'step': 12329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:13.492691', 'step': 12329, 'epoch': 2} {'type': 'loss', 'content': 0.1389954537153244, 'timestamp': '2025-10-01 04:28:13.495823', 'step': 12330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:13.540236', 'step': 12330, 'epoch': 2} {'type': 'loss', 'content': 0.07103253901004791, 'timestamp': '2025-10-01 04:28:13.542574', 'step': 12331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:13.588181', 'step': 12331, 'epoch': 2} {'type': 'loss', 'content': 0.06252550333738327, 'timestamp': '2025-10-01 04:28:13.611892', 'step': 12332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:13.646431', 'step': 12332, 'epoch': 2} {'type': 'loss', 'content': 0.07614101469516754, 'timestamp': '2025-10-01 04:28:13.648578', 'step': 12333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:13.694114', 'step': 12333, 'epoch': 2} {'type': 'loss', 'content': 0.086185522377491, 'timestamp': '2025-10-01 04:28:13.696480', 'step': 12334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:13.729960', 'step': 12334, 'epoch': 2} {'type': 'loss', 'content': 0.1332148313522339, 'timestamp': '2025-10-01 04:28:13.732183', 'step': 12335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:13.765421', 'step': 12335, 'epoch': 2} {'type': 'loss', 'content': 0.10962001979351044, 'timestamp': '2025-10-01 04:28:13.791080', 'step': 12336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:13.823562', 'step': 12336, 'epoch': 2} {'type': 'loss', 'content': 0.11018076539039612, 'timestamp': '2025-10-01 04:28:13.825707', 'step': 12337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:13.858709', 'step': 12337, 'epoch': 2} {'type': 'loss', 'content': 0.0799390897154808, 'timestamp': '2025-10-01 04:28:13.861056', 'step': 12338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:13.893091', 'step': 12338, 'epoch': 2} {'type': 'loss', 'content': 0.17099027335643768, 'timestamp': '2025-10-01 04:28:13.906732', 'step': 12339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:13.943091', 'step': 12339, 'epoch': 2} {'type': 'loss', 'content': 0.12169836461544037, 'timestamp': '2025-10-01 04:28:13.968665', 'step': 12340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:14.001381', 'step': 12340, 'epoch': 2} {'type': 'loss', 'content': 0.15001876652240753, 'timestamp': '2025-10-01 04:28:14.003685', 'step': 12341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:14.042509', 'step': 12341, 'epoch': 2} {'type': 'loss', 'content': 0.11838547140359879, 'timestamp': '2025-10-01 04:28:14.044738', 'step': 12342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:14.076997', 'step': 12342, 'epoch': 2} {'type': 'loss', 'content': 0.08896636962890625, 'timestamp': '2025-10-01 04:28:14.079340', 'step': 12343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:14.118258', 'step': 12343, 'epoch': 2} {'type': 'loss', 'content': 0.11894123256206512, 'timestamp': '2025-10-01 04:28:14.142293', 'step': 12344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:14.181078', 'step': 12344, 'epoch': 2} {'type': 'loss', 'content': 0.077705018222332, 'timestamp': '2025-10-01 04:28:14.183446', 'step': 12345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:14.217553', 'step': 12345, 'epoch': 2} {'type': 'loss', 'content': 0.14182335138320923, 'timestamp': '2025-10-01 04:28:14.219758', 'step': 12346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:14.253857', 'step': 12346, 'epoch': 2} {'type': 'loss', 'content': 0.10251595079898834, 'timestamp': '2025-10-01 04:28:14.256387', 'step': 12347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:14.289567', 'step': 12347, 'epoch': 2} {'type': 'loss', 'content': 0.088313028216362, 'timestamp': '2025-10-01 04:28:14.314054', 'step': 12348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:14.358483', 'step': 12348, 'epoch': 2} {'type': 'loss', 'content': 0.1495482623577118, 'timestamp': '2025-10-01 04:28:14.376506', 'step': 12349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:14.408878', 'step': 12349, 'epoch': 2} {'type': 'loss', 'content': 0.14699366688728333, 'timestamp': '2025-10-01 04:28:14.412300', 'step': 12350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:14.447192', 'step': 12350, 'epoch': 2} {'type': 'loss', 'content': 0.15219324827194214, 'timestamp': '2025-10-01 04:28:14.450736', 'step': 12351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:14.484034', 'step': 12351, 'epoch': 2} {'type': 'loss', 'content': 0.07125367969274521, 'timestamp': '2025-10-01 04:28:14.508040', 'step': 12352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:14.539058', 'step': 12352, 'epoch': 2} {'type': 'loss', 'content': 0.20949143171310425, 'timestamp': '2025-10-01 04:28:14.541599', 'step': 12353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:14.575999', 'step': 12353, 'epoch': 2} {'type': 'loss', 'content': 0.16317856311798096, 'timestamp': '2025-10-01 04:28:14.578561', 'step': 12354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:14.610053', 'step': 12354, 'epoch': 2} {'type': 'loss', 'content': 0.1579674482345581, 'timestamp': '2025-10-01 04:28:14.627394', 'step': 12355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:14.661554', 'step': 12355, 'epoch': 2} {'type': 'loss', 'content': 0.07496418803930283, 'timestamp': '2025-10-01 04:28:14.685331', 'step': 12356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:14.733094', 'step': 12356, 'epoch': 2} {'type': 'loss', 'content': 0.10078386217355728, 'timestamp': '2025-10-01 04:28:14.735767', 'step': 12357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:14.767656', 'step': 12357, 'epoch': 2} {'type': 'loss', 'content': 0.06746383011341095, 'timestamp': '2025-10-01 04:28:14.771338', 'step': 12358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:14.804178', 'step': 12358, 'epoch': 2} {'type': 'loss', 'content': 0.15625350177288055, 'timestamp': '2025-10-01 04:28:14.806925', 'step': 12359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:14.850286', 'step': 12359, 'epoch': 2} {'type': 'loss', 'content': 0.13639606535434723, 'timestamp': '2025-10-01 04:28:14.874929', 'step': 12360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:14.923251', 'step': 12360, 'epoch': 2} {'type': 'loss', 'content': 0.08788788318634033, 'timestamp': '2025-10-01 04:28:14.925495', 'step': 12361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:14.959422', 'step': 12361, 'epoch': 2} {'type': 'loss', 'content': 0.11569869518280029, 'timestamp': '2025-10-01 04:28:14.970629', 'step': 12362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:15.003032', 'step': 12362, 'epoch': 2} {'type': 'loss', 'content': 0.12149623781442642, 'timestamp': '2025-10-01 04:28:15.007039', 'step': 12363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.047860', 'step': 12363, 'epoch': 2} {'type': 'loss', 'content': 0.07083781063556671, 'timestamp': '2025-10-01 04:28:15.072291', 'step': 12364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.119542', 'step': 12364, 'epoch': 2} {'type': 'loss', 'content': 0.09910368174314499, 'timestamp': '2025-10-01 04:28:15.122546', 'step': 12365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:15.157421', 'step': 12365, 'epoch': 2} {'type': 'loss', 'content': 0.09266874194145203, 'timestamp': '2025-10-01 04:28:15.162047', 'step': 12366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.194361', 'step': 12366, 'epoch': 2} {'type': 'loss', 'content': 0.12221353501081467, 'timestamp': '2025-10-01 04:28:15.196637', 'step': 12367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:15.231112', 'step': 12367, 'epoch': 2} {'type': 'loss', 'content': 0.09887798130512238, 'timestamp': '2025-10-01 04:28:15.254887', 'step': 12368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:15.293627', 'step': 12368, 'epoch': 2} {'type': 'loss', 'content': 0.09085731208324432, 'timestamp': '2025-10-01 04:28:15.295813', 'step': 12369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:15.327174', 'step': 12369, 'epoch': 2} {'type': 'loss', 'content': 0.09961530566215515, 'timestamp': '2025-10-01 04:28:15.329612', 'step': 12370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.377203', 'step': 12370, 'epoch': 2} {'type': 'loss', 'content': 0.14401234686374664, 'timestamp': '2025-10-01 04:28:15.380779', 'step': 12371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.420260', 'step': 12371, 'epoch': 2} {'type': 'loss', 'content': 0.11668241769075394, 'timestamp': '2025-10-01 04:28:15.444062', 'step': 12372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:15.484585', 'step': 12372, 'epoch': 2} {'type': 'loss', 'content': 0.07845493406057358, 'timestamp': '2025-10-01 04:28:15.487202', 'step': 12373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.533471', 'step': 12373, 'epoch': 2} {'type': 'loss', 'content': 0.18567772209644318, 'timestamp': '2025-10-01 04:28:15.536537', 'step': 12374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:15.577191', 'step': 12374, 'epoch': 2} {'type': 'loss', 'content': 0.14054431021213531, 'timestamp': '2025-10-01 04:28:15.582653', 'step': 12375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.638125', 'step': 12375, 'epoch': 2} {'type': 'loss', 'content': 0.09909769892692566, 'timestamp': '2025-10-01 04:28:15.662247', 'step': 12376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.693110', 'step': 12376, 'epoch': 2} {'type': 'loss', 'content': 0.13984587788581848, 'timestamp': '2025-10-01 04:28:15.695447', 'step': 12377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.732388', 'step': 12377, 'epoch': 2} {'type': 'loss', 'content': 0.09282668679952621, 'timestamp': '2025-10-01 04:28:15.739797', 'step': 12378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:15.779805', 'step': 12378, 'epoch': 2} {'type': 'loss', 'content': 0.09139391779899597, 'timestamp': '2025-10-01 04:28:15.782073', 'step': 12379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.824576', 'step': 12379, 'epoch': 2} {'type': 'loss', 'content': 0.09517543017864227, 'timestamp': '2025-10-01 04:28:15.848407', 'step': 12380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:15.881795', 'step': 12380, 'epoch': 2} {'type': 'loss', 'content': 0.0783454030752182, 'timestamp': '2025-10-01 04:28:15.884582', 'step': 12381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:15.924454', 'step': 12381, 'epoch': 2} {'type': 'loss', 'content': 0.10051064193248749, 'timestamp': '2025-10-01 04:28:15.926964', 'step': 12382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:15.961525', 'step': 12382, 'epoch': 2} {'type': 'loss', 'content': 0.14806555211544037, 'timestamp': '2025-10-01 04:28:15.964379', 'step': 12383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:15.996281', 'step': 12383, 'epoch': 2} {'type': 'loss', 'content': 0.08582369238138199, 'timestamp': '2025-10-01 04:28:16.020050', 'step': 12384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:16.051469', 'step': 12384, 'epoch': 2} {'type': 'loss', 'content': 0.12869799137115479, 'timestamp': '2025-10-01 04:28:16.053582', 'step': 12385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:16.084470', 'step': 12385, 'epoch': 2} {'type': 'loss', 'content': 0.11147897690534592, 'timestamp': '2025-10-01 04:28:16.088882', 'step': 12386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:16.122959', 'step': 12386, 'epoch': 2} {'type': 'loss', 'content': 0.21548792719841003, 'timestamp': '2025-10-01 04:28:16.125608', 'step': 12387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:16.158007', 'step': 12387, 'epoch': 2} {'type': 'loss', 'content': 0.11301390081644058, 'timestamp': '2025-10-01 04:28:16.181953', 'step': 12388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:16.221061', 'step': 12388, 'epoch': 2} {'type': 'loss', 'content': 0.13666114211082458, 'timestamp': '2025-10-01 04:28:16.223683', 'step': 12389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:16.259755', 'step': 12389, 'epoch': 2} {'type': 'loss', 'content': 0.12167590111494064, 'timestamp': '2025-10-01 04:28:16.262190', 'step': 12390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:16.298120', 'step': 12390, 'epoch': 2} {'type': 'loss', 'content': 0.07939605414867401, 'timestamp': '2025-10-01 04:28:16.300339', 'step': 12391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:16.334528', 'step': 12391, 'epoch': 2} {'type': 'loss', 'content': 0.1269153654575348, 'timestamp': '2025-10-01 04:28:16.372391', 'step': 12392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:16.405345', 'step': 12392, 'epoch': 2} {'type': 'loss', 'content': 0.036964625120162964, 'timestamp': '2025-10-01 04:28:16.407519', 'step': 12393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:16.441528', 'step': 12393, 'epoch': 2} {'type': 'loss', 'content': 0.16530457139015198, 'timestamp': '2025-10-01 04:28:16.443957', 'step': 12394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:16.478236', 'step': 12394, 'epoch': 2} {'type': 'loss', 'content': 0.0790313184261322, 'timestamp': '2025-10-01 04:28:16.480455', 'step': 12395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:16.515005', 'step': 12395, 'epoch': 2} {'type': 'loss', 'content': 0.11797799915075302, 'timestamp': '2025-10-01 04:28:16.539365', 'step': 12396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:16.571259', 'step': 12396, 'epoch': 2} {'type': 'loss', 'content': 0.1299765557050705, 'timestamp': '2025-10-01 04:28:16.573651', 'step': 12397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:16.604567', 'step': 12397, 'epoch': 2} {'type': 'loss', 'content': 0.12149831652641296, 'timestamp': '2025-10-01 04:28:16.606769', 'step': 12398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:16.638161', 'step': 12398, 'epoch': 2} {'type': 'loss', 'content': 0.10926169902086258, 'timestamp': '2025-10-01 04:28:16.640354', 'step': 12399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:16.672511', 'step': 12399, 'epoch': 2} {'type': 'loss', 'content': 0.0649840235710144, 'timestamp': '2025-10-01 04:28:16.696658', 'step': 12400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:16.731954', 'step': 12400, 'epoch': 2} {'type': 'loss', 'content': 0.0906895101070404, 'timestamp': '2025-10-01 04:28:16.734228', 'step': 12401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:16.781991', 'step': 12401, 'epoch': 2} {'type': 'loss', 'content': 0.16909191012382507, 'timestamp': '2025-10-01 04:28:16.784240', 'step': 12402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:16.819816', 'step': 12402, 'epoch': 2} {'type': 'loss', 'content': 0.051350973546504974, 'timestamp': '2025-10-01 04:28:16.837240', 'step': 12403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:16.885016', 'step': 12403, 'epoch': 2} {'type': 'loss', 'content': 0.25330525636672974, 'timestamp': '2025-10-01 04:28:16.909223', 'step': 12404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:16.949057', 'step': 12404, 'epoch': 2} {'type': 'loss', 'content': 0.12104642391204834, 'timestamp': '2025-10-01 04:28:16.951199', 'step': 12405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:16.985874', 'step': 12405, 'epoch': 2} {'type': 'loss', 'content': 0.2256336659193039, 'timestamp': '2025-10-01 04:28:16.989223', 'step': 12406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:17.021925', 'step': 12406, 'epoch': 2} {'type': 'loss', 'content': 0.1590375304222107, 'timestamp': '2025-10-01 04:28:17.024849', 'step': 12407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:17.065688', 'step': 12407, 'epoch': 2} {'type': 'loss', 'content': 0.10020466148853302, 'timestamp': '2025-10-01 04:28:17.102988', 'step': 12408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:17.135558', 'step': 12408, 'epoch': 2} {'type': 'loss', 'content': 0.10613324493169785, 'timestamp': '2025-10-01 04:28:17.137726', 'step': 12409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:17.177825', 'step': 12409, 'epoch': 2} {'type': 'loss', 'content': 0.0493556410074234, 'timestamp': '2025-10-01 04:28:17.180008', 'step': 12410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:17.216223', 'step': 12410, 'epoch': 2} {'type': 'loss', 'content': 0.06707573682069778, 'timestamp': '2025-10-01 04:28:17.218402', 'step': 12411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:17.257524', 'step': 12411, 'epoch': 2} {'type': 'loss', 'content': 0.11762402206659317, 'timestamp': '2025-10-01 04:28:17.281301', 'step': 12412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:17.315101', 'step': 12412, 'epoch': 2} {'type': 'loss', 'content': 0.13419143855571747, 'timestamp': '2025-10-01 04:28:17.317428', 'step': 12413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:17.357207', 'step': 12413, 'epoch': 2} {'type': 'loss', 'content': 0.09877575188875198, 'timestamp': '2025-10-01 04:28:17.359395', 'step': 12414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:17.392303', 'step': 12414, 'epoch': 2} {'type': 'loss', 'content': 0.10445734858512878, 'timestamp': '2025-10-01 04:28:17.394931', 'step': 12415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:17.434359', 'step': 12415, 'epoch': 2} {'type': 'loss', 'content': 0.13785703480243683, 'timestamp': '2025-10-01 04:28:17.459658', 'step': 12416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:17.493131', 'step': 12416, 'epoch': 2} {'type': 'loss', 'content': 0.10894080996513367, 'timestamp': '2025-10-01 04:28:17.496153', 'step': 12417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:17.528793', 'step': 12417, 'epoch': 2} {'type': 'loss', 'content': 0.08840613067150116, 'timestamp': '2025-10-01 04:28:17.536188', 'step': 12418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:17.567503', 'step': 12418, 'epoch': 2} {'type': 'loss', 'content': 0.11786568909883499, 'timestamp': '2025-10-01 04:28:17.569894', 'step': 12419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:17.601520', 'step': 12419, 'epoch': 2} {'type': 'loss', 'content': 0.12742236256599426, 'timestamp': '2025-10-01 04:28:17.625175', 'step': 12420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:17.656352', 'step': 12420, 'epoch': 2} {'type': 'loss', 'content': 0.08281075954437256, 'timestamp': '2025-10-01 04:28:17.660130', 'step': 12421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:17.695534', 'step': 12421, 'epoch': 2} {'type': 'loss', 'content': 0.14431676268577576, 'timestamp': '2025-10-01 04:28:17.697615', 'step': 12422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:17.732931', 'step': 12422, 'epoch': 2} {'type': 'loss', 'content': 0.07272099703550339, 'timestamp': '2025-10-01 04:28:17.735266', 'step': 12423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:17.778381', 'step': 12423, 'epoch': 2} {'type': 'loss', 'content': 0.096685029566288, 'timestamp': '2025-10-01 04:28:17.809172', 'step': 12424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:17.844793', 'step': 12424, 'epoch': 2} {'type': 'loss', 'content': 0.094594806432724, 'timestamp': '2025-10-01 04:28:17.848099', 'step': 12425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:17.883744', 'step': 12425, 'epoch': 2} {'type': 'loss', 'content': 0.11137371510267258, 'timestamp': '2025-10-01 04:28:17.885864', 'step': 12426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:17.922129', 'step': 12426, 'epoch': 2} {'type': 'loss', 'content': 0.1262243688106537, 'timestamp': '2025-10-01 04:28:17.924246', 'step': 12427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:17.955334', 'step': 12427, 'epoch': 2} {'type': 'loss', 'content': 0.037577610462903976, 'timestamp': '2025-10-01 04:28:17.985976', 'step': 12428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:18.021548', 'step': 12428, 'epoch': 2} {'type': 'loss', 'content': 0.20914702117443085, 'timestamp': '2025-10-01 04:28:18.023807', 'step': 12429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:18.063419', 'step': 12429, 'epoch': 2} {'type': 'loss', 'content': 0.10562194138765335, 'timestamp': '2025-10-01 04:28:18.065539', 'step': 12430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:18.098303', 'step': 12430, 'epoch': 2} {'type': 'loss', 'content': 0.0841626524925232, 'timestamp': '2025-10-01 04:28:18.101883', 'step': 12431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:18.139558', 'step': 12431, 'epoch': 2} {'type': 'loss', 'content': 0.033695369958877563, 'timestamp': '2025-10-01 04:28:18.164584', 'step': 12432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:18.210925', 'step': 12432, 'epoch': 2} {'type': 'loss', 'content': 0.09358754754066467, 'timestamp': '2025-10-01 04:28:18.221373', 'step': 12433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:18.256212', 'step': 12433, 'epoch': 2} {'type': 'loss', 'content': 0.1389944702386856, 'timestamp': '2025-10-01 04:28:18.258309', 'step': 12434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:18.291700', 'step': 12434, 'epoch': 2} {'type': 'loss', 'content': 0.054226428270339966, 'timestamp': '2025-10-01 04:28:18.294852', 'step': 12435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:18.327190', 'step': 12435, 'epoch': 2} {'type': 'loss', 'content': 0.0974981039762497, 'timestamp': '2025-10-01 04:28:18.351248', 'step': 12436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:18.382973', 'step': 12436, 'epoch': 2} {'type': 'loss', 'content': 0.09543007612228394, 'timestamp': '2025-10-01 04:28:18.385183', 'step': 12437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:18.425281', 'step': 12437, 'epoch': 2} {'type': 'loss', 'content': 0.06531290709972382, 'timestamp': '2025-10-01 04:28:18.428335', 'step': 12438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:18.468468', 'step': 12438, 'epoch': 2} {'type': 'loss', 'content': 0.25989583134651184, 'timestamp': '2025-10-01 04:28:18.471000', 'step': 12439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:18.501860', 'step': 12439, 'epoch': 2} {'type': 'loss', 'content': 0.09955159574747086, 'timestamp': '2025-10-01 04:28:18.525513', 'step': 12440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:18.557344', 'step': 12440, 'epoch': 2} {'type': 'loss', 'content': 0.10822075605392456, 'timestamp': '2025-10-01 04:28:18.559651', 'step': 12441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:18.590420', 'step': 12441, 'epoch': 2} {'type': 'loss', 'content': 0.06992121040821075, 'timestamp': '2025-10-01 04:28:18.593320', 'step': 12442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:18.623818', 'step': 12442, 'epoch': 2} {'type': 'loss', 'content': 0.05155787616968155, 'timestamp': '2025-10-01 04:28:18.626616', 'step': 12443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:18.664794', 'step': 12443, 'epoch': 2} {'type': 'loss', 'content': 0.15205168724060059, 'timestamp': '2025-10-01 04:28:18.688467', 'step': 12444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:18.719319', 'step': 12444, 'epoch': 2} {'type': 'loss', 'content': 0.1286720335483551, 'timestamp': '2025-10-01 04:28:18.721575', 'step': 12445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:18.753896', 'step': 12445, 'epoch': 2} {'type': 'loss', 'content': 0.10675903409719467, 'timestamp': '2025-10-01 04:28:18.756422', 'step': 12446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:18.792477', 'step': 12446, 'epoch': 2} {'type': 'loss', 'content': 0.12110244482755661, 'timestamp': '2025-10-01 04:28:18.794649', 'step': 12447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:18.826482', 'step': 12447, 'epoch': 2} {'type': 'loss', 'content': 0.10073066502809525, 'timestamp': '2025-10-01 04:28:18.850128', 'step': 12448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:18.884805', 'step': 12448, 'epoch': 2} {'type': 'loss', 'content': 0.0671640932559967, 'timestamp': '2025-10-01 04:28:18.887097', 'step': 12449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:18.919299', 'step': 12449, 'epoch': 2} {'type': 'loss', 'content': 0.06193374842405319, 'timestamp': '2025-10-01 04:28:18.921430', 'step': 12450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:18.953564', 'step': 12450, 'epoch': 2} {'type': 'loss', 'content': 0.17264990508556366, 'timestamp': '2025-10-01 04:28:18.968508', 'step': 12451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:19.003003', 'step': 12451, 'epoch': 2} {'type': 'loss', 'content': 0.15176162123680115, 'timestamp': '2025-10-01 04:28:19.026702', 'step': 12452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:19.058099', 'step': 12452, 'epoch': 2} {'type': 'loss', 'content': 0.07495465129613876, 'timestamp': '2025-10-01 04:28:19.060252', 'step': 12453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:19.091912', 'step': 12453, 'epoch': 2} {'type': 'loss', 'content': 0.22035937011241913, 'timestamp': '2025-10-01 04:28:19.093983', 'step': 12454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:19.126128', 'step': 12454, 'epoch': 2} {'type': 'loss', 'content': 0.12710486352443695, 'timestamp': '2025-10-01 04:28:19.128349', 'step': 12455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:19.164314', 'step': 12455, 'epoch': 2} {'type': 'loss', 'content': 0.12036682665348053, 'timestamp': '2025-10-01 04:28:19.188402', 'step': 12456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:19.233138', 'step': 12456, 'epoch': 2} {'type': 'loss', 'content': 0.0859459936618805, 'timestamp': '2025-10-01 04:28:19.239757', 'step': 12457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:19.271699', 'step': 12457, 'epoch': 2} {'type': 'loss', 'content': 0.10631252080202103, 'timestamp': '2025-10-01 04:28:19.274516', 'step': 12458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:19.305468', 'step': 12458, 'epoch': 2} {'type': 'loss', 'content': 0.17892561852931976, 'timestamp': '2025-10-01 04:28:19.310281', 'step': 12459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:28:19.351432', 'step': 12459, 'epoch': 2} {'type': 'loss', 'content': 0.07282346487045288, 'timestamp': '2025-10-01 04:28:19.376990', 'step': 12460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:19.408842', 'step': 12460, 'epoch': 2} {'type': 'loss', 'content': 0.1119946539402008, 'timestamp': '2025-10-01 04:28:19.411208', 'step': 12461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:19.444814', 'step': 12461, 'epoch': 2} {'type': 'loss', 'content': 0.07981090992689133, 'timestamp': '2025-10-01 04:28:19.459016', 'step': 12462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:19.493518', 'step': 12462, 'epoch': 2} {'type': 'loss', 'content': 0.1467783898115158, 'timestamp': '2025-10-01 04:28:19.495778', 'step': 12463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:19.526603', 'step': 12463, 'epoch': 2} {'type': 'loss', 'content': 0.08820392191410065, 'timestamp': '2025-10-01 04:28:19.550339', 'step': 12464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:19.582211', 'step': 12464, 'epoch': 2} {'type': 'loss', 'content': 0.058958377689123154, 'timestamp': '2025-10-01 04:28:19.584530', 'step': 12465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:19.617219', 'step': 12465, 'epoch': 2} {'type': 'loss', 'content': 0.09824523329734802, 'timestamp': '2025-10-01 04:28:19.619330', 'step': 12466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:19.650615', 'step': 12466, 'epoch': 2} {'type': 'loss', 'content': 0.0751418024301529, 'timestamp': '2025-10-01 04:28:19.652857', 'step': 12467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:19.684931', 'step': 12467, 'epoch': 2} {'type': 'loss', 'content': 0.10010015219449997, 'timestamp': '2025-10-01 04:28:19.708709', 'step': 12468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:19.741225', 'step': 12468, 'epoch': 2} {'type': 'loss', 'content': 0.10300971567630768, 'timestamp': '2025-10-01 04:28:19.743392', 'step': 12469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:19.774094', 'step': 12469, 'epoch': 2} {'type': 'loss', 'content': 0.11508473008871078, 'timestamp': '2025-10-01 04:28:19.776279', 'step': 12470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:19.810727', 'step': 12470, 'epoch': 2} {'type': 'loss', 'content': 0.14625754952430725, 'timestamp': '2025-10-01 04:28:19.812935', 'step': 12471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:19.846885', 'step': 12471, 'epoch': 2} {'type': 'loss', 'content': 0.07080712169408798, 'timestamp': '2025-10-01 04:28:19.870546', 'step': 12472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:19.904293', 'step': 12472, 'epoch': 2} {'type': 'loss', 'content': 0.1445680856704712, 'timestamp': '2025-10-01 04:28:19.906383', 'step': 12473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:19.938052', 'step': 12473, 'epoch': 2} {'type': 'loss', 'content': 0.10556396842002869, 'timestamp': '2025-10-01 04:28:19.940381', 'step': 12474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:19.973080', 'step': 12474, 'epoch': 2} {'type': 'loss', 'content': 0.06494604051113129, 'timestamp': '2025-10-01 04:28:19.975419', 'step': 12475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:20.007076', 'step': 12475, 'epoch': 2} {'type': 'loss', 'content': 0.08991651982069016, 'timestamp': '2025-10-01 04:28:20.030682', 'step': 12476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:20.061465', 'step': 12476, 'epoch': 2} {'type': 'loss', 'content': 0.16569367051124573, 'timestamp': '2025-10-01 04:28:20.063585', 'step': 12477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:20.104664', 'step': 12477, 'epoch': 2} {'type': 'loss', 'content': 0.11848478764295578, 'timestamp': '2025-10-01 04:28:20.106846', 'step': 12478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:20.137694', 'step': 12478, 'epoch': 2} {'type': 'loss', 'content': 0.11740390956401825, 'timestamp': '2025-10-01 04:28:20.139886', 'step': 12479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:20.171362', 'step': 12479, 'epoch': 2} {'type': 'loss', 'content': 0.11935451626777649, 'timestamp': '2025-10-01 04:28:20.195094', 'step': 12480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:20.226067', 'step': 12480, 'epoch': 2} {'type': 'loss', 'content': 0.03235696628689766, 'timestamp': '2025-10-01 04:28:20.228247', 'step': 12481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:20.259100', 'step': 12481, 'epoch': 2} {'type': 'loss', 'content': 0.10942735522985458, 'timestamp': '2025-10-01 04:28:20.261252', 'step': 12482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:20.292182', 'step': 12482, 'epoch': 2} {'type': 'loss', 'content': 0.07016538828611374, 'timestamp': '2025-10-01 04:28:20.294501', 'step': 12483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:20.326865', 'step': 12483, 'epoch': 2} {'type': 'loss', 'content': 0.12180323898792267, 'timestamp': '2025-10-01 04:28:20.350499', 'step': 12484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:20.382452', 'step': 12484, 'epoch': 2} {'type': 'loss', 'content': 0.13229697942733765, 'timestamp': '2025-10-01 04:28:20.385169', 'step': 12485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:20.416055', 'step': 12485, 'epoch': 2} {'type': 'loss', 'content': 0.10428515821695328, 'timestamp': '2025-10-01 04:28:20.418923', 'step': 12486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:20.460167', 'step': 12486, 'epoch': 2} {'type': 'loss', 'content': 0.147750586271286, 'timestamp': '2025-10-01 04:28:20.469884', 'step': 12487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:20.501634', 'step': 12487, 'epoch': 2} {'type': 'loss', 'content': 0.10654225945472717, 'timestamp': '2025-10-01 04:28:20.539428', 'step': 12488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:20.570693', 'step': 12488, 'epoch': 2} {'type': 'loss', 'content': 0.12308122962713242, 'timestamp': '2025-10-01 04:28:20.579869', 'step': 12489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:20.611421', 'step': 12489, 'epoch': 2} {'type': 'loss', 'content': 0.09234439581632614, 'timestamp': '2025-10-01 04:28:20.613969', 'step': 12490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:20.645210', 'step': 12490, 'epoch': 2} {'type': 'loss', 'content': 0.09253543615341187, 'timestamp': '2025-10-01 04:28:20.647171', 'step': 12491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:20.678300', 'step': 12491, 'epoch': 2} {'type': 'loss', 'content': 0.10273568332195282, 'timestamp': '2025-10-01 04:28:20.702616', 'step': 12492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:20.733395', 'step': 12492, 'epoch': 2} {'type': 'loss', 'content': 0.14064404368400574, 'timestamp': '2025-10-01 04:28:20.735551', 'step': 12493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:20.767268', 'step': 12493, 'epoch': 2} {'type': 'loss', 'content': 0.09921609610319138, 'timestamp': '2025-10-01 04:28:20.769526', 'step': 12494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:20.804897', 'step': 12494, 'epoch': 2} {'type': 'loss', 'content': 0.08362990617752075, 'timestamp': '2025-10-01 04:28:20.807505', 'step': 12495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:20.841411', 'step': 12495, 'epoch': 2} {'type': 'loss', 'content': 0.1992487907409668, 'timestamp': '2025-10-01 04:28:20.877695', 'step': 12496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:20.910446', 'step': 12496, 'epoch': 2} {'type': 'loss', 'content': 0.12643280625343323, 'timestamp': '2025-10-01 04:28:20.913085', 'step': 12497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:20.946228', 'step': 12497, 'epoch': 2} {'type': 'loss', 'content': 0.11797784268856049, 'timestamp': '2025-10-01 04:28:20.948665', 'step': 12498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:20.979523', 'step': 12498, 'epoch': 2} {'type': 'loss', 'content': 0.18326310813426971, 'timestamp': '2025-10-01 04:28:20.982149', 'step': 12499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:21.013469', 'step': 12499, 'epoch': 2} {'type': 'loss', 'content': 0.15469758212566376, 'timestamp': '2025-10-01 04:28:21.037212', 'step': 12500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12500', 'timestamp': '2025-10-01 04:28:26.616181', 'step': 12500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:26.647962', 'step': 12500, 'epoch': 2} {'type': 'loss', 'content': 0.09663045406341553, 'timestamp': '2025-10-01 04:28:26.665476', 'step': 12501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:26.700196', 'step': 12501, 'epoch': 2} {'type': 'loss', 'content': 0.10799196362495422, 'timestamp': '2025-10-01 04:28:26.703130', 'step': 12502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:26.735938', 'step': 12502, 'epoch': 2} {'type': 'loss', 'content': 0.12371467798948288, 'timestamp': '2025-10-01 04:28:26.738227', 'step': 12503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:26.785119', 'step': 12503, 'epoch': 2} {'type': 'loss', 'content': 0.13353578746318817, 'timestamp': '2025-10-01 04:28:26.808895', 'step': 12504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:26.839559', 'step': 12504, 'epoch': 2} {'type': 'loss', 'content': 0.10128182917833328, 'timestamp': '2025-10-01 04:28:26.841877', 'step': 12505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:26.878313', 'step': 12505, 'epoch': 2} {'type': 'loss', 'content': 0.08635518699884415, 'timestamp': '2025-10-01 04:28:26.881008', 'step': 12506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:26.917624', 'step': 12506, 'epoch': 2} {'type': 'loss', 'content': 0.17319265007972717, 'timestamp': '2025-10-01 04:28:26.919648', 'step': 12507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:26.952961', 'step': 12507, 'epoch': 2} {'type': 'loss', 'content': 0.0714382454752922, 'timestamp': '2025-10-01 04:28:26.976652', 'step': 12508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:28:27.016956', 'step': 12508, 'epoch': 2} {'type': 'loss', 'content': 0.111944779753685, 'timestamp': '2025-10-01 04:28:27.019233', 'step': 12509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:27.054091', 'step': 12509, 'epoch': 2} {'type': 'loss', 'content': 0.1063331812620163, 'timestamp': '2025-10-01 04:28:27.056041', 'step': 12510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:27.088707', 'step': 12510, 'epoch': 2} {'type': 'loss', 'content': 0.11265397816896439, 'timestamp': '2025-10-01 04:28:27.090855', 'step': 12511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:27.122826', 'step': 12511, 'epoch': 2} {'type': 'loss', 'content': 0.13043169677257538, 'timestamp': '2025-10-01 04:28:27.149185', 'step': 12512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:27.188108', 'step': 12512, 'epoch': 2} {'type': 'loss', 'content': 0.09379158169031143, 'timestamp': '2025-10-01 04:28:27.190382', 'step': 12513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:27.222422', 'step': 12513, 'epoch': 2} {'type': 'loss', 'content': 0.08704524487257004, 'timestamp': '2025-10-01 04:28:27.224397', 'step': 12514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:27.256915', 'step': 12514, 'epoch': 2} {'type': 'loss', 'content': 0.12014082074165344, 'timestamp': '2025-10-01 04:28:27.259910', 'step': 12515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:27.291802', 'step': 12515, 'epoch': 2} {'type': 'loss', 'content': 0.04598725214600563, 'timestamp': '2025-10-01 04:28:27.315336', 'step': 12516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:27.354104', 'step': 12516, 'epoch': 2} {'type': 'loss', 'content': 0.19952599704265594, 'timestamp': '2025-10-01 04:28:27.356138', 'step': 12517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:27.390199', 'step': 12517, 'epoch': 2} {'type': 'loss', 'content': 0.0643792599439621, 'timestamp': '2025-10-01 04:28:27.399498', 'step': 12518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:27.431819', 'step': 12518, 'epoch': 2} {'type': 'loss', 'content': 0.10318055748939514, 'timestamp': '2025-10-01 04:28:27.447812', 'step': 12519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:27.485588', 'step': 12519, 'epoch': 2} {'type': 'loss', 'content': 0.07110805809497833, 'timestamp': '2025-10-01 04:28:27.509499', 'step': 12520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:27.541041', 'step': 12520, 'epoch': 2} {'type': 'loss', 'content': 0.07206691056489944, 'timestamp': '2025-10-01 04:28:27.551093', 'step': 12521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:27.584927', 'step': 12521, 'epoch': 2} {'type': 'loss', 'content': 0.10513149201869965, 'timestamp': '2025-10-01 04:28:27.587097', 'step': 12522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:27.619512', 'step': 12522, 'epoch': 2} {'type': 'loss', 'content': 0.058346446603536606, 'timestamp': '2025-10-01 04:28:27.621716', 'step': 12523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:27.652423', 'step': 12523, 'epoch': 2} {'type': 'loss', 'content': 0.16040946543216705, 'timestamp': '2025-10-01 04:28:27.687180', 'step': 12524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:27.720799', 'step': 12524, 'epoch': 2} {'type': 'loss', 'content': 0.11185209453105927, 'timestamp': '2025-10-01 04:28:27.723638', 'step': 12525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:27.757475', 'step': 12525, 'epoch': 2} {'type': 'loss', 'content': 0.18553690612316132, 'timestamp': '2025-10-01 04:28:27.760156', 'step': 12526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:27.793732', 'step': 12526, 'epoch': 2} {'type': 'loss', 'content': 0.11914080381393433, 'timestamp': '2025-10-01 04:28:27.796087', 'step': 12527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:27.827124', 'step': 12527, 'epoch': 2} {'type': 'loss', 'content': 0.06165057048201561, 'timestamp': '2025-10-01 04:28:27.850634', 'step': 12528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:27.883582', 'step': 12528, 'epoch': 2} {'type': 'loss', 'content': 0.12832273542881012, 'timestamp': '2025-10-01 04:28:27.885483', 'step': 12529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:27.920023', 'step': 12529, 'epoch': 2} {'type': 'loss', 'content': 0.1487160474061966, 'timestamp': '2025-10-01 04:28:27.921967', 'step': 12530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:27.957362', 'step': 12530, 'epoch': 2} {'type': 'loss', 'content': 0.11977465450763702, 'timestamp': '2025-10-01 04:28:27.959723', 'step': 12531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:28.005395', 'step': 12531, 'epoch': 2} {'type': 'loss', 'content': 0.01571350172162056, 'timestamp': '2025-10-01 04:28:28.030764', 'step': 12532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:28.067434', 'step': 12532, 'epoch': 2} {'type': 'loss', 'content': 0.07209143787622452, 'timestamp': '2025-10-01 04:28:28.083079', 'step': 12533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:28.117613', 'step': 12533, 'epoch': 2} {'type': 'loss', 'content': 0.14297328889369965, 'timestamp': '2025-10-01 04:28:28.119581', 'step': 12534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:28.162864', 'step': 12534, 'epoch': 2} {'type': 'loss', 'content': 0.0730820968747139, 'timestamp': '2025-10-01 04:28:28.165469', 'step': 12535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:28.204035', 'step': 12535, 'epoch': 2} {'type': 'loss', 'content': 0.12412475794553757, 'timestamp': '2025-10-01 04:28:28.227558', 'step': 12536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:28.259134', 'step': 12536, 'epoch': 2} {'type': 'loss', 'content': 0.06748636066913605, 'timestamp': '2025-10-01 04:28:28.261214', 'step': 12537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:28.292513', 'step': 12537, 'epoch': 2} {'type': 'loss', 'content': 0.11652872711420059, 'timestamp': '2025-10-01 04:28:28.294765', 'step': 12538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:28.325197', 'step': 12538, 'epoch': 2} {'type': 'loss', 'content': 0.12190898507833481, 'timestamp': '2025-10-01 04:28:28.327484', 'step': 12539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:28.358360', 'step': 12539, 'epoch': 2} {'type': 'loss', 'content': 0.05565366894006729, 'timestamp': '2025-10-01 04:28:28.382340', 'step': 12540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:28.412504', 'step': 12540, 'epoch': 2} {'type': 'loss', 'content': 0.12899525463581085, 'timestamp': '2025-10-01 04:28:28.414592', 'step': 12541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:28.450351', 'step': 12541, 'epoch': 2} {'type': 'loss', 'content': 0.2312423139810562, 'timestamp': '2025-10-01 04:28:28.452611', 'step': 12542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:28.483124', 'step': 12542, 'epoch': 2} {'type': 'loss', 'content': 0.10278823971748352, 'timestamp': '2025-10-01 04:28:28.485611', 'step': 12543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:28.516531', 'step': 12543, 'epoch': 2} {'type': 'loss', 'content': 0.05210955813527107, 'timestamp': '2025-10-01 04:28:28.539898', 'step': 12544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:28.575264', 'step': 12544, 'epoch': 2} {'type': 'loss', 'content': 0.08747575432062149, 'timestamp': '2025-10-01 04:28:28.577296', 'step': 12545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:28.608431', 'step': 12545, 'epoch': 2} {'type': 'loss', 'content': 0.06948649883270264, 'timestamp': '2025-10-01 04:28:28.610939', 'step': 12546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:28.647062', 'step': 12546, 'epoch': 2} {'type': 'loss', 'content': 0.19208437204360962, 'timestamp': '2025-10-01 04:28:28.649188', 'step': 12547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:28.679693', 'step': 12547, 'epoch': 2} {'type': 'loss', 'content': 0.09101047366857529, 'timestamp': '2025-10-01 04:28:28.703228', 'step': 12548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:28.734003', 'step': 12548, 'epoch': 2} {'type': 'loss', 'content': 0.07116134464740753, 'timestamp': '2025-10-01 04:28:28.735991', 'step': 12549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:28.767751', 'step': 12549, 'epoch': 2} {'type': 'loss', 'content': 0.1426774114370346, 'timestamp': '2025-10-01 04:28:28.777152', 'step': 12550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:28.808551', 'step': 12550, 'epoch': 2} {'type': 'loss', 'content': 0.1691538542509079, 'timestamp': '2025-10-01 04:28:28.810493', 'step': 12551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:28.841280', 'step': 12551, 'epoch': 2} {'type': 'loss', 'content': 0.18652160465717316, 'timestamp': '2025-10-01 04:28:28.864806', 'step': 12552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:28.895905', 'step': 12552, 'epoch': 2} {'type': 'loss', 'content': 0.04474150389432907, 'timestamp': '2025-10-01 04:28:28.897969', 'step': 12553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:28.930730', 'step': 12553, 'epoch': 2} {'type': 'loss', 'content': 0.09706997126340866, 'timestamp': '2025-10-01 04:28:28.934281', 'step': 12554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:28.967876', 'step': 12554, 'epoch': 2} {'type': 'loss', 'content': 0.16841168701648712, 'timestamp': '2025-10-01 04:28:28.970114', 'step': 12555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:29.001922', 'step': 12555, 'epoch': 2} {'type': 'loss', 'content': 0.19212067127227783, 'timestamp': '2025-10-01 04:28:29.026595', 'step': 12556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:29.057496', 'step': 12556, 'epoch': 2} {'type': 'loss', 'content': 0.09768380224704742, 'timestamp': '2025-10-01 04:28:29.060233', 'step': 12557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:29.102156', 'step': 12557, 'epoch': 2} {'type': 'loss', 'content': 0.07759810984134674, 'timestamp': '2025-10-01 04:28:29.110431', 'step': 12558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:29.147715', 'step': 12558, 'epoch': 2} {'type': 'loss', 'content': 0.15467777848243713, 'timestamp': '2025-10-01 04:28:29.149671', 'step': 12559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:29.180859', 'step': 12559, 'epoch': 2} {'type': 'loss', 'content': 0.1154913455247879, 'timestamp': '2025-10-01 04:28:29.204442', 'step': 12560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:29.239480', 'step': 12560, 'epoch': 2} {'type': 'loss', 'content': 0.06267079710960388, 'timestamp': '2025-10-01 04:28:29.242544', 'step': 12561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:29.273279', 'step': 12561, 'epoch': 2} {'type': 'loss', 'content': 0.13713619112968445, 'timestamp': '2025-10-01 04:28:29.275395', 'step': 12562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:29.307404', 'step': 12562, 'epoch': 2} {'type': 'loss', 'content': 0.18557344377040863, 'timestamp': '2025-10-01 04:28:29.309486', 'step': 12563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:29.340159', 'step': 12563, 'epoch': 2} {'type': 'loss', 'content': 0.18894807994365692, 'timestamp': '2025-10-01 04:28:29.364110', 'step': 12564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:29.395287', 'step': 12564, 'epoch': 2} {'type': 'loss', 'content': 0.12216752022504807, 'timestamp': '2025-10-01 04:28:29.398942', 'step': 12565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:29.429160', 'step': 12565, 'epoch': 2} {'type': 'loss', 'content': 0.06731143593788147, 'timestamp': '2025-10-01 04:28:29.433009', 'step': 12566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:29.463325', 'step': 12566, 'epoch': 2} {'type': 'loss', 'content': 0.10519382357597351, 'timestamp': '2025-10-01 04:28:29.465329', 'step': 12567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:29.495989', 'step': 12567, 'epoch': 2} {'type': 'loss', 'content': 0.0795067548751831, 'timestamp': '2025-10-01 04:28:29.519345', 'step': 12568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:29.549569', 'step': 12568, 'epoch': 2} {'type': 'loss', 'content': 0.17895114421844482, 'timestamp': '2025-10-01 04:28:29.551772', 'step': 12569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:29.584692', 'step': 12569, 'epoch': 2} {'type': 'loss', 'content': 0.20108692348003387, 'timestamp': '2025-10-01 04:28:29.586547', 'step': 12570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:29.618378', 'step': 12570, 'epoch': 2} {'type': 'loss', 'content': 0.10727008432149887, 'timestamp': '2025-10-01 04:28:29.620375', 'step': 12571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:29.652096', 'step': 12571, 'epoch': 2} {'type': 'loss', 'content': 0.08845536410808563, 'timestamp': '2025-10-01 04:28:29.675528', 'step': 12572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:29.706034', 'step': 12572, 'epoch': 2} {'type': 'loss', 'content': 0.13131918013095856, 'timestamp': '2025-10-01 04:28:29.707908', 'step': 12573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:29.738975', 'step': 12573, 'epoch': 2} {'type': 'loss', 'content': 0.14467161893844604, 'timestamp': '2025-10-01 04:28:29.741882', 'step': 12574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:29.772171', 'step': 12574, 'epoch': 2} {'type': 'loss', 'content': 0.090599425137043, 'timestamp': '2025-10-01 04:28:29.774391', 'step': 12575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:29.805808', 'step': 12575, 'epoch': 2} {'type': 'loss', 'content': 0.10552658140659332, 'timestamp': '2025-10-01 04:28:29.829608', 'step': 12576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:29.860247', 'step': 12576, 'epoch': 2} {'type': 'loss', 'content': 0.11226991564035416, 'timestamp': '2025-10-01 04:28:29.862575', 'step': 12577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:29.895367', 'step': 12577, 'epoch': 2} {'type': 'loss', 'content': 0.15863628685474396, 'timestamp': '2025-10-01 04:28:29.897302', 'step': 12578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:29.928051', 'step': 12578, 'epoch': 2} {'type': 'loss', 'content': 0.10876632481813431, 'timestamp': '2025-10-01 04:28:29.931230', 'step': 12579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:29.968257', 'step': 12579, 'epoch': 2} {'type': 'loss', 'content': 0.17242039740085602, 'timestamp': '2025-10-01 04:28:29.992083', 'step': 12580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:30.024300', 'step': 12580, 'epoch': 2} {'type': 'loss', 'content': 0.12338639795780182, 'timestamp': '2025-10-01 04:28:30.026266', 'step': 12581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:30.059153', 'step': 12581, 'epoch': 2} {'type': 'loss', 'content': 0.1231251135468483, 'timestamp': '2025-10-01 04:28:30.061246', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:28:38.820525', 'step': 12582, 'epoch': 2} {'type': 'pplx', 'content': 14940.19984230254, 'timestamp': '2025-10-01 04:28:38.823756', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:38.854931', 'step': 12582, 'epoch': 2} {'type': 'loss', 'content': 0.05371939390897751, 'timestamp': '2025-10-01 04:28:38.857074', 'step': 12583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:38.891656', 'step': 12583, 'epoch': 2} {'type': 'loss', 'content': 0.07950633019208908, 'timestamp': '2025-10-01 04:28:38.915827', 'step': 12584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:38.948036', 'step': 12584, 'epoch': 2} {'type': 'loss', 'content': 0.08416847139596939, 'timestamp': '2025-10-01 04:28:38.949877', 'step': 12585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:38.988923', 'step': 12585, 'epoch': 2} {'type': 'loss', 'content': 0.10352851450443268, 'timestamp': '2025-10-01 04:28:38.991227', 'step': 12586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:39.022629', 'step': 12586, 'epoch': 2} {'type': 'loss', 'content': 0.07832242548465729, 'timestamp': '2025-10-01 04:28:39.024711', 'step': 12587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:39.065869', 'step': 12587, 'epoch': 2} {'type': 'loss', 'content': 0.13361960649490356, 'timestamp': '2025-10-01 04:28:39.089569', 'step': 12588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:39.120812', 'step': 12588, 'epoch': 2} {'type': 'loss', 'content': 0.1118336021900177, 'timestamp': '2025-10-01 04:28:39.122899', 'step': 12589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:39.153340', 'step': 12589, 'epoch': 2} {'type': 'loss', 'content': 0.09975095093250275, 'timestamp': '2025-10-01 04:28:39.155600', 'step': 12590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:39.187077', 'step': 12590, 'epoch': 2} {'type': 'loss', 'content': 0.0513179786503315, 'timestamp': '2025-10-01 04:28:39.189146', 'step': 12591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:39.219167', 'step': 12591, 'epoch': 2} {'type': 'loss', 'content': 0.0655609741806984, 'timestamp': '2025-10-01 04:28:39.242815', 'step': 12592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:39.282138', 'step': 12592, 'epoch': 2} {'type': 'loss', 'content': 0.09770576655864716, 'timestamp': '2025-10-01 04:28:39.284159', 'step': 12593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:39.315825', 'step': 12593, 'epoch': 2} {'type': 'loss', 'content': 0.08476608991622925, 'timestamp': '2025-10-01 04:28:39.318142', 'step': 12594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:39.348844', 'step': 12594, 'epoch': 2} {'type': 'loss', 'content': 0.09802442044019699, 'timestamp': '2025-10-01 04:28:39.351228', 'step': 12595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:39.382306', 'step': 12595, 'epoch': 2} {'type': 'loss', 'content': 0.13941603899002075, 'timestamp': '2025-10-01 04:28:39.405778', 'step': 12596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:39.437312', 'step': 12596, 'epoch': 2} {'type': 'loss', 'content': 0.07463646680116653, 'timestamp': '2025-10-01 04:28:39.439443', 'step': 12597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:39.470638', 'step': 12597, 'epoch': 2} {'type': 'loss', 'content': 0.14523597061634064, 'timestamp': '2025-10-01 04:28:39.483507', 'step': 12598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:39.514812', 'step': 12598, 'epoch': 2} {'type': 'loss', 'content': 0.20291896164417267, 'timestamp': '2025-10-01 04:28:39.517098', 'step': 12599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:39.548195', 'step': 12599, 'epoch': 2} {'type': 'loss', 'content': 0.1264449506998062, 'timestamp': '2025-10-01 04:28:39.571885', 'step': 12600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:39.612365', 'step': 12600, 'epoch': 2} {'type': 'loss', 'content': 0.09708437323570251, 'timestamp': '2025-10-01 04:28:39.614991', 'step': 12601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:39.646177', 'step': 12601, 'epoch': 2} {'type': 'loss', 'content': 0.1080993190407753, 'timestamp': '2025-10-01 04:28:39.650376', 'step': 12602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:39.681816', 'step': 12602, 'epoch': 2} {'type': 'loss', 'content': 0.0533001609146595, 'timestamp': '2025-10-01 04:28:39.684158', 'step': 12603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:39.715744', 'step': 12603, 'epoch': 2} {'type': 'loss', 'content': 0.10811241716146469, 'timestamp': '2025-10-01 04:28:39.739328', 'step': 12604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:39.771045', 'step': 12604, 'epoch': 2} {'type': 'loss', 'content': 0.06851185858249664, 'timestamp': '2025-10-01 04:28:39.773234', 'step': 12605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:39.806478', 'step': 12605, 'epoch': 2} {'type': 'loss', 'content': 0.07122242450714111, 'timestamp': '2025-10-01 04:28:39.808497', 'step': 12606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:39.839347', 'step': 12606, 'epoch': 2} {'type': 'loss', 'content': 0.08789723366498947, 'timestamp': '2025-10-01 04:28:39.841493', 'step': 12607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:39.872037', 'step': 12607, 'epoch': 2} {'type': 'loss', 'content': 0.1156458854675293, 'timestamp': '2025-10-01 04:28:39.895615', 'step': 12608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:39.927894', 'step': 12608, 'epoch': 2} {'type': 'loss', 'content': 0.012865168042480946, 'timestamp': '2025-10-01 04:28:39.929943', 'step': 12609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:39.966859', 'step': 12609, 'epoch': 2} {'type': 'loss', 'content': 0.13476836681365967, 'timestamp': '2025-10-01 04:28:39.972224', 'step': 12610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:40.006637', 'step': 12610, 'epoch': 2} {'type': 'loss', 'content': 0.0476221889257431, 'timestamp': '2025-10-01 04:28:40.012645', 'step': 12611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:40.049480', 'step': 12611, 'epoch': 2} {'type': 'loss', 'content': 0.09883572161197662, 'timestamp': '2025-10-01 04:28:40.073285', 'step': 12612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:40.106174', 'step': 12612, 'epoch': 2} {'type': 'loss', 'content': 0.12618714570999146, 'timestamp': '2025-10-01 04:28:40.108227', 'step': 12613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:40.139092', 'step': 12613, 'epoch': 2} {'type': 'loss', 'content': 0.09429700672626495, 'timestamp': '2025-10-01 04:28:40.141269', 'step': 12614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:40.178943', 'step': 12614, 'epoch': 2} {'type': 'loss', 'content': 0.09395655244588852, 'timestamp': '2025-10-01 04:28:40.181034', 'step': 12615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:40.212704', 'step': 12615, 'epoch': 2} {'type': 'loss', 'content': 0.09756705909967422, 'timestamp': '2025-10-01 04:28:40.236274', 'step': 12616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:40.271568', 'step': 12616, 'epoch': 2} {'type': 'loss', 'content': 0.09828666597604752, 'timestamp': '2025-10-01 04:28:40.274566', 'step': 12617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:40.308312', 'step': 12617, 'epoch': 2} {'type': 'loss', 'content': 0.09738495945930481, 'timestamp': '2025-10-01 04:28:40.310673', 'step': 12618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:40.345514', 'step': 12618, 'epoch': 2} {'type': 'loss', 'content': 0.09957639873027802, 'timestamp': '2025-10-01 04:28:40.347942', 'step': 12619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:40.380298', 'step': 12619, 'epoch': 2} {'type': 'loss', 'content': 0.1378500908613205, 'timestamp': '2025-10-01 04:28:40.404478', 'step': 12620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:40.439938', 'step': 12620, 'epoch': 2} {'type': 'loss', 'content': 0.051546718925237656, 'timestamp': '2025-10-01 04:28:40.442409', 'step': 12621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:40.474095', 'step': 12621, 'epoch': 2} {'type': 'loss', 'content': 0.0988253653049469, 'timestamp': '2025-10-01 04:28:40.482019', 'step': 12622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:40.513546', 'step': 12622, 'epoch': 2} {'type': 'loss', 'content': 0.1373809278011322, 'timestamp': '2025-10-01 04:28:40.525180', 'step': 12623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:40.556837', 'step': 12623, 'epoch': 2} {'type': 'loss', 'content': 0.14900298416614532, 'timestamp': '2025-10-01 04:28:40.580884', 'step': 12624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:40.612412', 'step': 12624, 'epoch': 2} {'type': 'loss', 'content': 0.11922101676464081, 'timestamp': '2025-10-01 04:28:40.615974', 'step': 12625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:40.651631', 'step': 12625, 'epoch': 2} {'type': 'loss', 'content': 0.0887974202632904, 'timestamp': '2025-10-01 04:28:40.654856', 'step': 12626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:40.688039', 'step': 12626, 'epoch': 2} {'type': 'loss', 'content': 0.17815521359443665, 'timestamp': '2025-10-01 04:28:40.690379', 'step': 12627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:40.734567', 'step': 12627, 'epoch': 2} {'type': 'loss', 'content': 0.06661787629127502, 'timestamp': '2025-10-01 04:28:40.759519', 'step': 12628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:40.801542', 'step': 12628, 'epoch': 2} {'type': 'loss', 'content': 0.17870618402957916, 'timestamp': '2025-10-01 04:28:40.817804', 'step': 12629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:40.852764', 'step': 12629, 'epoch': 2} {'type': 'loss', 'content': 0.1069268211722374, 'timestamp': '2025-10-01 04:28:40.854934', 'step': 12630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:40.887979', 'step': 12630, 'epoch': 2} {'type': 'loss', 'content': 0.17478340864181519, 'timestamp': '2025-10-01 04:28:40.894543', 'step': 12631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:40.925290', 'step': 12631, 'epoch': 2} {'type': 'loss', 'content': 0.1480536311864853, 'timestamp': '2025-10-01 04:28:40.948993', 'step': 12632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:40.979117', 'step': 12632, 'epoch': 2} {'type': 'loss', 'content': 0.034646082669496536, 'timestamp': '2025-10-01 04:28:40.983032', 'step': 12633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:41.013528', 'step': 12633, 'epoch': 2} {'type': 'loss', 'content': 0.09492705762386322, 'timestamp': '2025-10-01 04:28:41.015532', 'step': 12634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:41.045863', 'step': 12634, 'epoch': 2} {'type': 'loss', 'content': 0.15634560585021973, 'timestamp': '2025-10-01 04:28:41.048566', 'step': 12635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:41.079694', 'step': 12635, 'epoch': 2} {'type': 'loss', 'content': 0.06137368083000183, 'timestamp': '2025-10-01 04:28:41.103458', 'step': 12636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:28:41.135623', 'step': 12636, 'epoch': 2} {'type': 'loss', 'content': 0.08913332223892212, 'timestamp': '2025-10-01 04:28:41.137913', 'step': 12637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:41.168413', 'step': 12637, 'epoch': 2} {'type': 'loss', 'content': 0.045633383095264435, 'timestamp': '2025-10-01 04:28:41.170460', 'step': 12638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:41.202948', 'step': 12638, 'epoch': 2} {'type': 'loss', 'content': 0.11175932735204697, 'timestamp': '2025-10-01 04:28:41.205061', 'step': 12639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:41.237404', 'step': 12639, 'epoch': 2} {'type': 'loss', 'content': 0.09835752099752426, 'timestamp': '2025-10-01 04:28:41.261494', 'step': 12640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:41.292069', 'step': 12640, 'epoch': 2} {'type': 'loss', 'content': 0.10805973410606384, 'timestamp': '2025-10-01 04:28:41.295597', 'step': 12641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:41.326289', 'step': 12641, 'epoch': 2} {'type': 'loss', 'content': 0.12132773548364639, 'timestamp': '2025-10-01 04:28:41.328579', 'step': 12642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:41.358244', 'step': 12642, 'epoch': 2} {'type': 'loss', 'content': 0.12358088791370392, 'timestamp': '2025-10-01 04:28:41.360794', 'step': 12643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:41.394462', 'step': 12643, 'epoch': 2} {'type': 'loss', 'content': 0.12754946947097778, 'timestamp': '2025-10-01 04:28:41.418231', 'step': 12644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:41.448451', 'step': 12644, 'epoch': 2} {'type': 'loss', 'content': 0.1558924913406372, 'timestamp': '2025-10-01 04:28:41.452038', 'step': 12645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:41.485295', 'step': 12645, 'epoch': 2} {'type': 'loss', 'content': 0.1361357867717743, 'timestamp': '2025-10-01 04:28:41.487407', 'step': 12646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:41.518029', 'step': 12646, 'epoch': 2} {'type': 'loss', 'content': 0.06520180404186249, 'timestamp': '2025-10-01 04:28:41.521678', 'step': 12647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:41.554828', 'step': 12647, 'epoch': 2} {'type': 'loss', 'content': 0.1613210290670395, 'timestamp': '2025-10-01 04:28:41.585140', 'step': 12648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:41.619684', 'step': 12648, 'epoch': 2} {'type': 'loss', 'content': 0.1397343873977661, 'timestamp': '2025-10-01 04:28:41.621853', 'step': 12649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:41.653154', 'step': 12649, 'epoch': 2} {'type': 'loss', 'content': 0.08684883266687393, 'timestamp': '2025-10-01 04:28:41.656103', 'step': 12650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:41.687157', 'step': 12650, 'epoch': 2} {'type': 'loss', 'content': 0.11807297170162201, 'timestamp': '2025-10-01 04:28:41.696607', 'step': 12651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:41.736803', 'step': 12651, 'epoch': 2} {'type': 'loss', 'content': 0.14142674207687378, 'timestamp': '2025-10-01 04:28:41.760276', 'step': 12652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:41.797508', 'step': 12652, 'epoch': 2} {'type': 'loss', 'content': 0.07752268761396408, 'timestamp': '2025-10-01 04:28:41.799573', 'step': 12653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:41.832693', 'step': 12653, 'epoch': 2} {'type': 'loss', 'content': 0.10882171243429184, 'timestamp': '2025-10-01 04:28:41.834680', 'step': 12654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:41.866015', 'step': 12654, 'epoch': 2} {'type': 'loss', 'content': 0.0706329494714737, 'timestamp': '2025-10-01 04:28:41.868358', 'step': 12655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:41.899596', 'step': 12655, 'epoch': 2} {'type': 'loss', 'content': 0.09664265811443329, 'timestamp': '2025-10-01 04:28:41.923930', 'step': 12656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:41.955036', 'step': 12656, 'epoch': 2} {'type': 'loss', 'content': 0.20774485170841217, 'timestamp': '2025-10-01 04:28:41.961696', 'step': 12657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:42.009364', 'step': 12657, 'epoch': 2} {'type': 'loss', 'content': 0.13583315908908844, 'timestamp': '2025-10-01 04:28:42.013195', 'step': 12658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:42.058078', 'step': 12658, 'epoch': 2} {'type': 'loss', 'content': 0.08535531908273697, 'timestamp': '2025-10-01 04:28:42.064877', 'step': 12659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:42.102282', 'step': 12659, 'epoch': 2} {'type': 'loss', 'content': 0.16462647914886475, 'timestamp': '2025-10-01 04:28:42.127432', 'step': 12660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:42.165601', 'step': 12660, 'epoch': 2} {'type': 'loss', 'content': 0.1862681806087494, 'timestamp': '2025-10-01 04:28:42.170103', 'step': 12661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:42.208140', 'step': 12661, 'epoch': 2} {'type': 'loss', 'content': 0.13160468637943268, 'timestamp': '2025-10-01 04:28:42.216082', 'step': 12662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:28:42.250690', 'step': 12662, 'epoch': 2} {'type': 'loss', 'content': 0.10204442590475082, 'timestamp': '2025-10-01 04:28:42.255225', 'step': 12663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:42.295823', 'step': 12663, 'epoch': 2} {'type': 'loss', 'content': 0.07839225977659225, 'timestamp': '2025-10-01 04:28:42.325449', 'step': 12664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:42.358168', 'step': 12664, 'epoch': 2} {'type': 'loss', 'content': 0.06857194006443024, 'timestamp': '2025-10-01 04:28:42.361215', 'step': 12665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:42.397524', 'step': 12665, 'epoch': 2} {'type': 'loss', 'content': 0.09440189599990845, 'timestamp': '2025-10-01 04:28:42.417307', 'step': 12666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:42.448960', 'step': 12666, 'epoch': 2} {'type': 'loss', 'content': 0.08991831541061401, 'timestamp': '2025-10-01 04:28:42.462470', 'step': 12667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:42.501077', 'step': 12667, 'epoch': 2} {'type': 'loss', 'content': 0.10603716969490051, 'timestamp': '2025-10-01 04:28:42.526585', 'step': 12668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:42.559860', 'step': 12668, 'epoch': 2} {'type': 'loss', 'content': 0.16691383719444275, 'timestamp': '2025-10-01 04:28:42.564898', 'step': 12669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:42.597628', 'step': 12669, 'epoch': 2} {'type': 'loss', 'content': 0.12329535186290741, 'timestamp': '2025-10-01 04:28:42.600316', 'step': 12670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:42.642683', 'step': 12670, 'epoch': 2} {'type': 'loss', 'content': 0.043114617466926575, 'timestamp': '2025-10-01 04:28:42.645989', 'step': 12671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:42.703189', 'step': 12671, 'epoch': 2} {'type': 'loss', 'content': 0.08078598976135254, 'timestamp': '2025-10-01 04:28:42.731551', 'step': 12672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:42.769802', 'step': 12672, 'epoch': 2} {'type': 'loss', 'content': 0.04471593722701073, 'timestamp': '2025-10-01 04:28:42.775427', 'step': 12673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:42.814323', 'step': 12673, 'epoch': 2} {'type': 'loss', 'content': 0.16668954491615295, 'timestamp': '2025-10-01 04:28:42.817235', 'step': 12674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:42.861822', 'step': 12674, 'epoch': 2} {'type': 'loss', 'content': 0.07377773523330688, 'timestamp': '2025-10-01 04:28:42.864642', 'step': 12675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:42.906553', 'step': 12675, 'epoch': 2} {'type': 'loss', 'content': 0.1015567034482956, 'timestamp': '2025-10-01 04:28:42.941228', 'step': 12676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:42.984958', 'step': 12676, 'epoch': 2} {'type': 'loss', 'content': 0.0959741622209549, 'timestamp': '2025-10-01 04:28:42.987676', 'step': 12677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:43.032243', 'step': 12677, 'epoch': 2} {'type': 'loss', 'content': 0.1206270158290863, 'timestamp': '2025-10-01 04:28:43.045741', 'step': 12678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:43.085229', 'step': 12678, 'epoch': 2} {'type': 'loss', 'content': 0.07613227516412735, 'timestamp': '2025-10-01 04:28:43.093801', 'step': 12679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:43.134000', 'step': 12679, 'epoch': 2} {'type': 'loss', 'content': 0.12862181663513184, 'timestamp': '2025-10-01 04:28:43.160153', 'step': 12680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:43.191500', 'step': 12680, 'epoch': 2} {'type': 'loss', 'content': 0.2612609565258026, 'timestamp': '2025-10-01 04:28:43.194251', 'step': 12681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:43.228414', 'step': 12681, 'epoch': 2} {'type': 'loss', 'content': 0.18223540484905243, 'timestamp': '2025-10-01 04:28:43.231213', 'step': 12682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:43.279679', 'step': 12682, 'epoch': 2} {'type': 'loss', 'content': 0.033022306859493256, 'timestamp': '2025-10-01 04:28:43.282091', 'step': 12683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:43.312555', 'step': 12683, 'epoch': 2} {'type': 'loss', 'content': 0.0965438038110733, 'timestamp': '2025-10-01 04:28:43.336411', 'step': 12684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:43.380104', 'step': 12684, 'epoch': 2} {'type': 'loss', 'content': 0.0637483149766922, 'timestamp': '2025-10-01 04:28:43.384181', 'step': 12685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:43.415135', 'step': 12685, 'epoch': 2} {'type': 'loss', 'content': 0.05887361988425255, 'timestamp': '2025-10-01 04:28:43.417463', 'step': 12686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:43.452702', 'step': 12686, 'epoch': 2} {'type': 'loss', 'content': 0.13126212358474731, 'timestamp': '2025-10-01 04:28:43.458783', 'step': 12687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:43.502666', 'step': 12687, 'epoch': 2} {'type': 'loss', 'content': 0.07397398352622986, 'timestamp': '2025-10-01 04:28:43.527029', 'step': 12688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:43.575449', 'step': 12688, 'epoch': 2} {'type': 'loss', 'content': 0.17035748064517975, 'timestamp': '2025-10-01 04:28:43.577731', 'step': 12689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:43.608991', 'step': 12689, 'epoch': 2} {'type': 'loss', 'content': 0.1348380446434021, 'timestamp': '2025-10-01 04:28:43.610989', 'step': 12690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:43.643314', 'step': 12690, 'epoch': 2} {'type': 'loss', 'content': 0.03626953065395355, 'timestamp': '2025-10-01 04:28:43.646022', 'step': 12691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:43.679080', 'step': 12691, 'epoch': 2} {'type': 'loss', 'content': 0.06707527488470078, 'timestamp': '2025-10-01 04:28:43.702972', 'step': 12692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:43.748725', 'step': 12692, 'epoch': 2} {'type': 'loss', 'content': 0.045731838792562485, 'timestamp': '2025-10-01 04:28:43.751403', 'step': 12693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:43.784259', 'step': 12693, 'epoch': 2} {'type': 'loss', 'content': 0.19528469443321228, 'timestamp': '2025-10-01 04:28:43.786728', 'step': 12694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:43.819501', 'step': 12694, 'epoch': 2} {'type': 'loss', 'content': 0.027632152661681175, 'timestamp': '2025-10-01 04:28:43.822996', 'step': 12695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:43.854576', 'step': 12695, 'epoch': 2} {'type': 'loss', 'content': 0.0933738574385643, 'timestamp': '2025-10-01 04:28:43.878892', 'step': 12696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:43.910517', 'step': 12696, 'epoch': 2} {'type': 'loss', 'content': 0.05588959902524948, 'timestamp': '2025-10-01 04:28:43.912802', 'step': 12697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:43.944045', 'step': 12697, 'epoch': 2} {'type': 'loss', 'content': 0.16722455620765686, 'timestamp': '2025-10-01 04:28:43.946361', 'step': 12698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:43.978692', 'step': 12698, 'epoch': 2} {'type': 'loss', 'content': 0.1293763816356659, 'timestamp': '2025-10-01 04:28:43.981424', 'step': 12699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:44.018235', 'step': 12699, 'epoch': 2} {'type': 'loss', 'content': 0.13917097449302673, 'timestamp': '2025-10-01 04:28:44.042563', 'step': 12700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:44.076205', 'step': 12700, 'epoch': 2} {'type': 'loss', 'content': 0.068597212433815, 'timestamp': '2025-10-01 04:28:44.078645', 'step': 12701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:44.110821', 'step': 12701, 'epoch': 2} {'type': 'loss', 'content': 0.09125204384326935, 'timestamp': '2025-10-01 04:28:44.113015', 'step': 12702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:44.144626', 'step': 12702, 'epoch': 2} {'type': 'loss', 'content': 0.08018755912780762, 'timestamp': '2025-10-01 04:28:44.147935', 'step': 12703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:44.178533', 'step': 12703, 'epoch': 2} {'type': 'loss', 'content': 0.13391979038715363, 'timestamp': '2025-10-01 04:28:44.202440', 'step': 12704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:44.234283', 'step': 12704, 'epoch': 2} {'type': 'loss', 'content': 0.039429835975170135, 'timestamp': '2025-10-01 04:28:44.243730', 'step': 12705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:44.281616', 'step': 12705, 'epoch': 2} {'type': 'loss', 'content': 0.10110333561897278, 'timestamp': '2025-10-01 04:28:44.284069', 'step': 12706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:44.314908', 'step': 12706, 'epoch': 2} {'type': 'loss', 'content': 0.12041056901216507, 'timestamp': '2025-10-01 04:28:44.324969', 'step': 12707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:44.357410', 'step': 12707, 'epoch': 2} {'type': 'loss', 'content': 0.09799028187990189, 'timestamp': '2025-10-01 04:28:44.381311', 'step': 12708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:44.413578', 'step': 12708, 'epoch': 2} {'type': 'loss', 'content': 0.04182666167616844, 'timestamp': '2025-10-01 04:28:44.416149', 'step': 12709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:44.448735', 'step': 12709, 'epoch': 2} {'type': 'loss', 'content': 0.1340230107307434, 'timestamp': '2025-10-01 04:28:44.451769', 'step': 12710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:44.488174', 'step': 12710, 'epoch': 2} {'type': 'loss', 'content': 0.07473747432231903, 'timestamp': '2025-10-01 04:28:44.490336', 'step': 12711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:44.540469', 'step': 12711, 'epoch': 2} {'type': 'loss', 'content': 0.0948282852768898, 'timestamp': '2025-10-01 04:28:44.564124', 'step': 12712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:44.607171', 'step': 12712, 'epoch': 2} {'type': 'loss', 'content': 0.12902916967868805, 'timestamp': '2025-10-01 04:28:44.611004', 'step': 12713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:44.648921', 'step': 12713, 'epoch': 2} {'type': 'loss', 'content': 0.15576137602329254, 'timestamp': '2025-10-01 04:28:44.655595', 'step': 12714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:44.695626', 'step': 12714, 'epoch': 2} {'type': 'loss', 'content': 0.16695541143417358, 'timestamp': '2025-10-01 04:28:44.699458', 'step': 12715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:44.731151', 'step': 12715, 'epoch': 2} {'type': 'loss', 'content': 0.10978350788354874, 'timestamp': '2025-10-01 04:28:44.755464', 'step': 12716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:44.794182', 'step': 12716, 'epoch': 2} {'type': 'loss', 'content': 0.10317254811525345, 'timestamp': '2025-10-01 04:28:44.798584', 'step': 12717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:44.838168', 'step': 12717, 'epoch': 2} {'type': 'loss', 'content': 0.08371306955814362, 'timestamp': '2025-10-01 04:28:44.840820', 'step': 12718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:44.873150', 'step': 12718, 'epoch': 2} {'type': 'loss', 'content': 0.03856104984879494, 'timestamp': '2025-10-01 04:28:44.875728', 'step': 12719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:44.915556', 'step': 12719, 'epoch': 2} {'type': 'loss', 'content': 0.049678996205329895, 'timestamp': '2025-10-01 04:28:44.939796', 'step': 12720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:44.978071', 'step': 12720, 'epoch': 2} {'type': 'loss', 'content': 0.05132970213890076, 'timestamp': '2025-10-01 04:28:44.983638', 'step': 12721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:45.024339', 'step': 12721, 'epoch': 2} {'type': 'loss', 'content': 0.060632359236478806, 'timestamp': '2025-10-01 04:28:45.026436', 'step': 12722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:45.086510', 'step': 12722, 'epoch': 2} {'type': 'loss', 'content': 0.045590486377477646, 'timestamp': '2025-10-01 04:28:45.089635', 'step': 12723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:45.121990', 'step': 12723, 'epoch': 2} {'type': 'loss', 'content': 0.05421021953225136, 'timestamp': '2025-10-01 04:28:45.146103', 'step': 12724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:45.184508', 'step': 12724, 'epoch': 2} {'type': 'loss', 'content': 0.17973390221595764, 'timestamp': '2025-10-01 04:28:45.186695', 'step': 12725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:45.223730', 'step': 12725, 'epoch': 2} {'type': 'loss', 'content': 0.1139223724603653, 'timestamp': '2025-10-01 04:28:45.226580', 'step': 12726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:45.276542', 'step': 12726, 'epoch': 2} {'type': 'loss', 'content': 0.11652376502752304, 'timestamp': '2025-10-01 04:28:45.278963', 'step': 12727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:45.313047', 'step': 12727, 'epoch': 2} {'type': 'loss', 'content': 0.032760508358478546, 'timestamp': '2025-10-01 04:28:45.344753', 'step': 12728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:45.417772', 'step': 12728, 'epoch': 2} {'type': 'loss', 'content': 0.12572510540485382, 'timestamp': '2025-10-01 04:28:45.420083', 'step': 12729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:45.462853', 'step': 12729, 'epoch': 2} {'type': 'loss', 'content': 0.10194893926382065, 'timestamp': '2025-10-01 04:28:45.465119', 'step': 12730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:45.513132', 'step': 12730, 'epoch': 2} {'type': 'loss', 'content': 0.13719525933265686, 'timestamp': '2025-10-01 04:28:45.524561', 'step': 12731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:45.557925', 'step': 12731, 'epoch': 2} {'type': 'loss', 'content': 0.040967218577861786, 'timestamp': '2025-10-01 04:28:45.581738', 'step': 12732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:45.614314', 'step': 12732, 'epoch': 2} {'type': 'loss', 'content': 0.17709875106811523, 'timestamp': '2025-10-01 04:28:45.616970', 'step': 12733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:45.647876', 'step': 12733, 'epoch': 2} {'type': 'loss', 'content': 0.1900942474603653, 'timestamp': '2025-10-01 04:28:45.649882', 'step': 12734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:45.681385', 'step': 12734, 'epoch': 2} {'type': 'loss', 'content': 0.1774400919675827, 'timestamp': '2025-10-01 04:28:45.693276', 'step': 12735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:45.726294', 'step': 12735, 'epoch': 2} {'type': 'loss', 'content': 0.11145253479480743, 'timestamp': '2025-10-01 04:28:45.750003', 'step': 12736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:45.785685', 'step': 12736, 'epoch': 2} {'type': 'loss', 'content': 0.044785041362047195, 'timestamp': '2025-10-01 04:28:45.796312', 'step': 12737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:45.832756', 'step': 12737, 'epoch': 2} {'type': 'loss', 'content': 0.10204347223043442, 'timestamp': '2025-10-01 04:28:45.834852', 'step': 12738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:45.868916', 'step': 12738, 'epoch': 2} {'type': 'loss', 'content': 0.18980546295642853, 'timestamp': '2025-10-01 04:28:45.871004', 'step': 12739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:45.911028', 'step': 12739, 'epoch': 2} {'type': 'loss', 'content': 0.06764104217290878, 'timestamp': '2025-10-01 04:28:45.934856', 'step': 12740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:45.966874', 'step': 12740, 'epoch': 2} {'type': 'loss', 'content': 0.15701985359191895, 'timestamp': '2025-10-01 04:28:45.969633', 'step': 12741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:46.004391', 'step': 12741, 'epoch': 2} {'type': 'loss', 'content': 0.15793539583683014, 'timestamp': '2025-10-01 04:28:46.006815', 'step': 12742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:46.037877', 'step': 12742, 'epoch': 2} {'type': 'loss', 'content': 0.10187277942895889, 'timestamp': '2025-10-01 04:28:46.040217', 'step': 12743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:46.072859', 'step': 12743, 'epoch': 2} {'type': 'loss', 'content': 0.07805012166500092, 'timestamp': '2025-10-01 04:28:46.096652', 'step': 12744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:46.127461', 'step': 12744, 'epoch': 2} {'type': 'loss', 'content': 0.15500663220882416, 'timestamp': '2025-10-01 04:28:46.129577', 'step': 12745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:46.161222', 'step': 12745, 'epoch': 2} {'type': 'loss', 'content': 0.2466740757226944, 'timestamp': '2025-10-01 04:28:46.163356', 'step': 12746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:46.193974', 'step': 12746, 'epoch': 2} {'type': 'loss', 'content': 0.08824288845062256, 'timestamp': '2025-10-01 04:28:46.196565', 'step': 12747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:46.231613', 'step': 12747, 'epoch': 2} {'type': 'loss', 'content': 0.07911863178014755, 'timestamp': '2025-10-01 04:28:46.255253', 'step': 12748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:46.290213', 'step': 12748, 'epoch': 2} {'type': 'loss', 'content': 0.14443884789943695, 'timestamp': '2025-10-01 04:28:46.292235', 'step': 12749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:46.324064', 'step': 12749, 'epoch': 2} {'type': 'loss', 'content': 0.1051320731639862, 'timestamp': '2025-10-01 04:28:46.326219', 'step': 12750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:46.360913', 'step': 12750, 'epoch': 2} {'type': 'loss', 'content': 0.16275779902935028, 'timestamp': '2025-10-01 04:28:46.363182', 'step': 12751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:46.398399', 'step': 12751, 'epoch': 2} {'type': 'loss', 'content': 0.11581560969352722, 'timestamp': '2025-10-01 04:28:46.421885', 'step': 12752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:46.459794', 'step': 12752, 'epoch': 2} {'type': 'loss', 'content': 0.19093838334083557, 'timestamp': '2025-10-01 04:28:46.461900', 'step': 12753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:46.497884', 'step': 12753, 'epoch': 2} {'type': 'loss', 'content': 0.06056743487715721, 'timestamp': '2025-10-01 04:28:46.499771', 'step': 12754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:46.532869', 'step': 12754, 'epoch': 2} {'type': 'loss', 'content': 0.07968132197856903, 'timestamp': '2025-10-01 04:28:46.536356', 'step': 12755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:46.568052', 'step': 12755, 'epoch': 2} {'type': 'loss', 'content': 0.0967143252491951, 'timestamp': '2025-10-01 04:28:46.591789', 'step': 12756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:46.623692', 'step': 12756, 'epoch': 2} {'type': 'loss', 'content': 0.13883210718631744, 'timestamp': '2025-10-01 04:28:46.625964', 'step': 12757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:46.658585', 'step': 12757, 'epoch': 2} {'type': 'loss', 'content': 0.1316637247800827, 'timestamp': '2025-10-01 04:28:46.660831', 'step': 12758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:46.701724', 'step': 12758, 'epoch': 2} {'type': 'loss', 'content': 0.1355995088815689, 'timestamp': '2025-10-01 04:28:46.703853', 'step': 12759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:46.741033', 'step': 12759, 'epoch': 2} {'type': 'loss', 'content': 0.11603023111820221, 'timestamp': '2025-10-01 04:28:46.764801', 'step': 12760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:46.803398', 'step': 12760, 'epoch': 2} {'type': 'loss', 'content': 0.1048036739230156, 'timestamp': '2025-10-01 04:28:46.805392', 'step': 12761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:46.837552', 'step': 12761, 'epoch': 2} {'type': 'loss', 'content': 0.10370724648237228, 'timestamp': '2025-10-01 04:28:46.840007', 'step': 12762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:46.870681', 'step': 12762, 'epoch': 2} {'type': 'loss', 'content': 0.16264288127422333, 'timestamp': '2025-10-01 04:28:46.873291', 'step': 12763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:46.906355', 'step': 12763, 'epoch': 2} {'type': 'loss', 'content': 0.03893480449914932, 'timestamp': '2025-10-01 04:28:46.930016', 'step': 12764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:46.960810', 'step': 12764, 'epoch': 2} {'type': 'loss', 'content': 0.05537255480885506, 'timestamp': '2025-10-01 04:28:46.963089', 'step': 12765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:46.994144', 'step': 12765, 'epoch': 2} {'type': 'loss', 'content': 0.10856049507856369, 'timestamp': '2025-10-01 04:28:46.996114', 'step': 12766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:47.035417', 'step': 12766, 'epoch': 2} {'type': 'loss', 'content': 0.11300349235534668, 'timestamp': '2025-10-01 04:28:47.037455', 'step': 12767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:47.068685', 'step': 12767, 'epoch': 2} {'type': 'loss', 'content': 0.11335543543100357, 'timestamp': '2025-10-01 04:28:47.092347', 'step': 12768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:47.124323', 'step': 12768, 'epoch': 2} {'type': 'loss', 'content': 0.1126401424407959, 'timestamp': '2025-10-01 04:28:47.130825', 'step': 12769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:47.164801', 'step': 12769, 'epoch': 2} {'type': 'loss', 'content': 0.10012304782867432, 'timestamp': '2025-10-01 04:28:47.167129', 'step': 12770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:47.198681', 'step': 12770, 'epoch': 2} {'type': 'loss', 'content': 0.0533866360783577, 'timestamp': '2025-10-01 04:28:47.201067', 'step': 12771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:47.232550', 'step': 12771, 'epoch': 2} {'type': 'loss', 'content': 0.0770842581987381, 'timestamp': '2025-10-01 04:28:47.256360', 'step': 12772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:47.295859', 'step': 12772, 'epoch': 2} {'type': 'loss', 'content': 0.1473734974861145, 'timestamp': '2025-10-01 04:28:47.298158', 'step': 12773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:47.329374', 'step': 12773, 'epoch': 2} {'type': 'loss', 'content': 0.2677096426486969, 'timestamp': '2025-10-01 04:28:47.331425', 'step': 12774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:47.361830', 'step': 12774, 'epoch': 2} {'type': 'loss', 'content': 0.10417337715625763, 'timestamp': '2025-10-01 04:28:47.363889', 'step': 12775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:47.396665', 'step': 12775, 'epoch': 2} {'type': 'loss', 'content': 0.14393417537212372, 'timestamp': '2025-10-01 04:28:47.420607', 'step': 12776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:47.452284', 'step': 12776, 'epoch': 2} {'type': 'loss', 'content': 0.08935491740703583, 'timestamp': '2025-10-01 04:28:47.454346', 'step': 12777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:47.505023', 'step': 12777, 'epoch': 2} {'type': 'loss', 'content': 0.059453465044498444, 'timestamp': '2025-10-01 04:28:47.507001', 'step': 12778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:47.537686', 'step': 12778, 'epoch': 2} {'type': 'loss', 'content': 0.07761935144662857, 'timestamp': '2025-10-01 04:28:47.540539', 'step': 12779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:47.585321', 'step': 12779, 'epoch': 2} {'type': 'loss', 'content': 0.12263896316289902, 'timestamp': '2025-10-01 04:28:47.608926', 'step': 12780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:47.640087', 'step': 12780, 'epoch': 2} {'type': 'loss', 'content': 0.09468568861484528, 'timestamp': '2025-10-01 04:28:47.642656', 'step': 12781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:47.673590', 'step': 12781, 'epoch': 2} {'type': 'loss', 'content': 0.06614089012145996, 'timestamp': '2025-10-01 04:28:47.675965', 'step': 12782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:47.707021', 'step': 12782, 'epoch': 2} {'type': 'loss', 'content': 0.10903213918209076, 'timestamp': '2025-10-01 04:28:47.709213', 'step': 12783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:47.740454', 'step': 12783, 'epoch': 2} {'type': 'loss', 'content': 0.19998669624328613, 'timestamp': '2025-10-01 04:28:47.764034', 'step': 12784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:47.803526', 'step': 12784, 'epoch': 2} {'type': 'loss', 'content': 0.11322342604398727, 'timestamp': '2025-10-01 04:28:47.805519', 'step': 12785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:47.835800', 'step': 12785, 'epoch': 2} {'type': 'loss', 'content': 0.08254764974117279, 'timestamp': '2025-10-01 04:28:47.838117', 'step': 12786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:47.868869', 'step': 12786, 'epoch': 2} {'type': 'loss', 'content': 0.05996480584144592, 'timestamp': '2025-10-01 04:28:47.871288', 'step': 12787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:47.902402', 'step': 12787, 'epoch': 2} {'type': 'loss', 'content': 0.08698727935552597, 'timestamp': '2025-10-01 04:28:47.925968', 'step': 12788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:47.956616', 'step': 12788, 'epoch': 2} {'type': 'loss', 'content': 0.09562304615974426, 'timestamp': '2025-10-01 04:28:47.960236', 'step': 12789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:28:47.992522', 'step': 12789, 'epoch': 2} {'type': 'loss', 'content': 0.081397145986557, 'timestamp': '2025-10-01 04:28:48.005072', 'step': 12790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:48.037274', 'step': 12790, 'epoch': 2} {'type': 'loss', 'content': 0.10907980799674988, 'timestamp': '2025-10-01 04:28:48.039482', 'step': 12791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:48.069816', 'step': 12791, 'epoch': 2} {'type': 'loss', 'content': 0.17277942597866058, 'timestamp': '2025-10-01 04:28:48.093351', 'step': 12792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:48.124676', 'step': 12792, 'epoch': 2} {'type': 'loss', 'content': 0.1294640153646469, 'timestamp': '2025-10-01 04:28:48.126694', 'step': 12793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:48.157218', 'step': 12793, 'epoch': 2} {'type': 'loss', 'content': 0.050427328795194626, 'timestamp': '2025-10-01 04:28:48.159685', 'step': 12794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:48.189715', 'step': 12794, 'epoch': 2} {'type': 'loss', 'content': 0.16489030420780182, 'timestamp': '2025-10-01 04:28:48.191844', 'step': 12795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:48.221673', 'step': 12795, 'epoch': 2} {'type': 'loss', 'content': 0.11537937074899673, 'timestamp': '2025-10-01 04:28:48.245446', 'step': 12796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:48.275875', 'step': 12796, 'epoch': 2} {'type': 'loss', 'content': 0.11025892943143845, 'timestamp': '2025-10-01 04:28:48.278120', 'step': 12797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:48.315623', 'step': 12797, 'epoch': 2} {'type': 'loss', 'content': 0.12815435230731964, 'timestamp': '2025-10-01 04:28:48.317751', 'step': 12798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:48.347938', 'step': 12798, 'epoch': 2} {'type': 'loss', 'content': 0.13479551672935486, 'timestamp': '2025-10-01 04:28:48.350559', 'step': 12799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:48.381594', 'step': 12799, 'epoch': 2} {'type': 'loss', 'content': 0.0822685956954956, 'timestamp': '2025-10-01 04:28:48.407499', 'step': 12800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:48.437799', 'step': 12800, 'epoch': 2} {'type': 'loss', 'content': 0.04673434793949127, 'timestamp': '2025-10-01 04:28:48.440579', 'step': 12801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:48.471057', 'step': 12801, 'epoch': 2} {'type': 'loss', 'content': 0.008092728443443775, 'timestamp': '2025-10-01 04:28:48.473228', 'step': 12802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:48.503623', 'step': 12802, 'epoch': 2} {'type': 'loss', 'content': 0.1626753956079483, 'timestamp': '2025-10-01 04:28:48.506002', 'step': 12803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:48.549334', 'step': 12803, 'epoch': 2} {'type': 'loss', 'content': 0.1311960369348526, 'timestamp': '2025-10-01 04:28:48.572929', 'step': 12804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:48.603169', 'step': 12804, 'epoch': 2} {'type': 'loss', 'content': 0.09189765155315399, 'timestamp': '2025-10-01 04:28:48.605261', 'step': 12805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:48.636481', 'step': 12805, 'epoch': 2} {'type': 'loss', 'content': 0.11631668359041214, 'timestamp': '2025-10-01 04:28:48.638805', 'step': 12806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:48.677129', 'step': 12806, 'epoch': 2} {'type': 'loss', 'content': 0.1608497053384781, 'timestamp': '2025-10-01 04:28:48.679986', 'step': 12807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:48.710755', 'step': 12807, 'epoch': 2} {'type': 'loss', 'content': 0.12260616570711136, 'timestamp': '2025-10-01 04:28:48.734769', 'step': 12808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:48.766505', 'step': 12808, 'epoch': 2} {'type': 'loss', 'content': 0.12551146745681763, 'timestamp': '2025-10-01 04:28:48.768527', 'step': 12809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:48.799396', 'step': 12809, 'epoch': 2} {'type': 'loss', 'content': 0.17023693025112152, 'timestamp': '2025-10-01 04:28:48.801839', 'step': 12810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:48.841405', 'step': 12810, 'epoch': 2} {'type': 'loss', 'content': 0.12618453800678253, 'timestamp': '2025-10-01 04:28:48.843439', 'step': 12811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:48.877839', 'step': 12811, 'epoch': 2} {'type': 'loss', 'content': 0.17374318838119507, 'timestamp': '2025-10-01 04:28:48.901580', 'step': 12812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:48.931568', 'step': 12812, 'epoch': 2} {'type': 'loss', 'content': 0.05004030838608742, 'timestamp': '2025-10-01 04:28:48.942403', 'step': 12813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:48.974056', 'step': 12813, 'epoch': 2} {'type': 'loss', 'content': 0.072092704474926, 'timestamp': '2025-10-01 04:28:48.976879', 'step': 12814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:49.010177', 'step': 12814, 'epoch': 2} {'type': 'loss', 'content': 0.13737963140010834, 'timestamp': '2025-10-01 04:28:49.012472', 'step': 12815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:49.042710', 'step': 12815, 'epoch': 2} {'type': 'loss', 'content': 0.13271842896938324, 'timestamp': '2025-10-01 04:28:49.071538', 'step': 12816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.106917', 'step': 12816, 'epoch': 2} {'type': 'loss', 'content': 0.12190140038728714, 'timestamp': '2025-10-01 04:28:49.108918', 'step': 12817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:49.139295', 'step': 12817, 'epoch': 2} {'type': 'loss', 'content': 0.07886926084756851, 'timestamp': '2025-10-01 04:28:49.141591', 'step': 12818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.172391', 'step': 12818, 'epoch': 2} {'type': 'loss', 'content': 0.04632555693387985, 'timestamp': '2025-10-01 04:28:49.174385', 'step': 12819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:49.209985', 'step': 12819, 'epoch': 2} {'type': 'loss', 'content': 0.10488291829824448, 'timestamp': '2025-10-01 04:28:49.234237', 'step': 12820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.265149', 'step': 12820, 'epoch': 2} {'type': 'loss', 'content': 0.09095322340726852, 'timestamp': '2025-10-01 04:28:49.267196', 'step': 12821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.299678', 'step': 12821, 'epoch': 2} {'type': 'loss', 'content': 0.08099028468132019, 'timestamp': '2025-10-01 04:28:49.302294', 'step': 12822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.332400', 'step': 12822, 'epoch': 2} {'type': 'loss', 'content': 0.10829108208417892, 'timestamp': '2025-10-01 04:28:49.338170', 'step': 12823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.375911', 'step': 12823, 'epoch': 2} {'type': 'loss', 'content': 0.10019617527723312, 'timestamp': '2025-10-01 04:28:49.400239', 'step': 12824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:49.431898', 'step': 12824, 'epoch': 2} {'type': 'loss', 'content': 0.08495455235242844, 'timestamp': '2025-10-01 04:28:49.434571', 'step': 12825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:49.466324', 'step': 12825, 'epoch': 2} {'type': 'loss', 'content': 0.12593567371368408, 'timestamp': '2025-10-01 04:28:49.469530', 'step': 12826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.500078', 'step': 12826, 'epoch': 2} {'type': 'loss', 'content': 0.055721115320920944, 'timestamp': '2025-10-01 04:28:49.502121', 'step': 12827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.533101', 'step': 12827, 'epoch': 2} {'type': 'loss', 'content': 0.05742402747273445, 'timestamp': '2025-10-01 04:28:49.557501', 'step': 12828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:49.595357', 'step': 12828, 'epoch': 2} {'type': 'loss', 'content': 0.10701140016317368, 'timestamp': '2025-10-01 04:28:49.598180', 'step': 12829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:49.629207', 'step': 12829, 'epoch': 2} {'type': 'loss', 'content': 0.09151933342218399, 'timestamp': '2025-10-01 04:28:49.641583', 'step': 12830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:49.675241', 'step': 12830, 'epoch': 2} {'type': 'loss', 'content': 0.0907365083694458, 'timestamp': '2025-10-01 04:28:49.677816', 'step': 12831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.708355', 'step': 12831, 'epoch': 2} {'type': 'loss', 'content': 0.14759443700313568, 'timestamp': '2025-10-01 04:28:49.731916', 'step': 12832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:49.762192', 'step': 12832, 'epoch': 2} {'type': 'loss', 'content': 0.06764402240514755, 'timestamp': '2025-10-01 04:28:49.764395', 'step': 12833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.798479', 'step': 12833, 'epoch': 2} {'type': 'loss', 'content': 0.0632888674736023, 'timestamp': '2025-10-01 04:28:49.807563', 'step': 12834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:49.838841', 'step': 12834, 'epoch': 2} {'type': 'loss', 'content': 0.06255963444709778, 'timestamp': '2025-10-01 04:28:49.841242', 'step': 12835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.876408', 'step': 12835, 'epoch': 2} {'type': 'loss', 'content': 0.06976202130317688, 'timestamp': '2025-10-01 04:28:49.901189', 'step': 12836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:49.934394', 'step': 12836, 'epoch': 2} {'type': 'loss', 'content': 0.07145772129297256, 'timestamp': '2025-10-01 04:28:49.936958', 'step': 12837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:49.969042', 'step': 12837, 'epoch': 2} {'type': 'loss', 'content': 0.06679795682430267, 'timestamp': '2025-10-01 04:28:49.971399', 'step': 12838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:50.013040', 'step': 12838, 'epoch': 2} {'type': 'loss', 'content': 0.15277425944805145, 'timestamp': '2025-10-01 04:28:50.015825', 'step': 12839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:50.052620', 'step': 12839, 'epoch': 2} {'type': 'loss', 'content': 0.12504422664642334, 'timestamp': '2025-10-01 04:28:50.076609', 'step': 12840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:50.113302', 'step': 12840, 'epoch': 2} {'type': 'loss', 'content': 0.09289108961820602, 'timestamp': '2025-10-01 04:28:50.116887', 'step': 12841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:50.148104', 'step': 12841, 'epoch': 2} {'type': 'loss', 'content': 0.0786445364356041, 'timestamp': '2025-10-01 04:28:50.150550', 'step': 12842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:50.183418', 'step': 12842, 'epoch': 2} {'type': 'loss', 'content': 0.049582339823246, 'timestamp': '2025-10-01 04:28:50.185625', 'step': 12843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:50.216289', 'step': 12843, 'epoch': 2} {'type': 'loss', 'content': 0.10930562019348145, 'timestamp': '2025-10-01 04:28:50.240022', 'step': 12844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:50.270850', 'step': 12844, 'epoch': 2} {'type': 'loss', 'content': 0.0923728197813034, 'timestamp': '2025-10-01 04:28:50.275961', 'step': 12845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:50.306771', 'step': 12845, 'epoch': 2} {'type': 'loss', 'content': 0.05772819370031357, 'timestamp': '2025-10-01 04:28:50.309437', 'step': 12846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:50.339658', 'step': 12846, 'epoch': 2} {'type': 'loss', 'content': 0.08778584003448486, 'timestamp': '2025-10-01 04:28:50.342141', 'step': 12847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:50.373027', 'step': 12847, 'epoch': 2} {'type': 'loss', 'content': 0.1458195000886917, 'timestamp': '2025-10-01 04:28:50.401471', 'step': 12848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:50.433795', 'step': 12848, 'epoch': 2} {'type': 'loss', 'content': 0.10287732630968094, 'timestamp': '2025-10-01 04:28:50.435877', 'step': 12849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:50.466617', 'step': 12849, 'epoch': 2} {'type': 'loss', 'content': 0.1842920184135437, 'timestamp': '2025-10-01 04:28:50.469108', 'step': 12850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:50.500257', 'step': 12850, 'epoch': 2} {'type': 'loss', 'content': 0.07222586870193481, 'timestamp': '2025-10-01 04:28:50.502639', 'step': 12851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:50.533144', 'step': 12851, 'epoch': 2} {'type': 'loss', 'content': 0.19946777820587158, 'timestamp': '2025-10-01 04:28:50.556671', 'step': 12852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:50.601677', 'step': 12852, 'epoch': 2} {'type': 'loss', 'content': 0.05104362219572067, 'timestamp': '2025-10-01 04:28:50.603720', 'step': 12853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:50.634024', 'step': 12853, 'epoch': 2} {'type': 'loss', 'content': 0.09332253038883209, 'timestamp': '2025-10-01 04:28:50.636062', 'step': 12854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:50.666441', 'step': 12854, 'epoch': 2} {'type': 'loss', 'content': 0.12892168760299683, 'timestamp': '2025-10-01 04:28:50.668750', 'step': 12855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:50.699390', 'step': 12855, 'epoch': 2} {'type': 'loss', 'content': 0.12728789448738098, 'timestamp': '2025-10-01 04:28:50.723270', 'step': 12856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:50.754365', 'step': 12856, 'epoch': 2} {'type': 'loss', 'content': 0.09589079022407532, 'timestamp': '2025-10-01 04:28:50.757052', 'step': 12857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:50.788401', 'step': 12857, 'epoch': 2} {'type': 'loss', 'content': 0.16101336479187012, 'timestamp': '2025-10-01 04:28:50.791330', 'step': 12858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:50.823426', 'step': 12858, 'epoch': 2} {'type': 'loss', 'content': 0.15003176033496857, 'timestamp': '2025-10-01 04:28:50.826058', 'step': 12859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:50.856888', 'step': 12859, 'epoch': 2} {'type': 'loss', 'content': 0.07842091470956802, 'timestamp': '2025-10-01 04:28:50.881120', 'step': 12860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:50.912434', 'step': 12860, 'epoch': 2} {'type': 'loss', 'content': 0.06498195976018906, 'timestamp': '2025-10-01 04:28:50.915084', 'step': 12861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:50.945731', 'step': 12861, 'epoch': 2} {'type': 'loss', 'content': 0.0880357176065445, 'timestamp': '2025-10-01 04:28:50.948174', 'step': 12862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:50.979050', 'step': 12862, 'epoch': 2} {'type': 'loss', 'content': 0.10498078167438507, 'timestamp': '2025-10-01 04:28:50.981714', 'step': 12863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:51.013253', 'step': 12863, 'epoch': 2} {'type': 'loss', 'content': 0.05605330318212509, 'timestamp': '2025-10-01 04:28:51.037240', 'step': 12864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.068070', 'step': 12864, 'epoch': 2} {'type': 'loss', 'content': 0.04929458349943161, 'timestamp': '2025-10-01 04:28:51.079055', 'step': 12865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.110185', 'step': 12865, 'epoch': 2} {'type': 'loss', 'content': 0.11833884567022324, 'timestamp': '2025-10-01 04:28:51.112233', 'step': 12866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.143173', 'step': 12866, 'epoch': 2} {'type': 'loss', 'content': 0.10489235073328018, 'timestamp': '2025-10-01 04:28:51.145562', 'step': 12867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:51.175971', 'step': 12867, 'epoch': 2} {'type': 'loss', 'content': 0.09976834803819656, 'timestamp': '2025-10-01 04:28:51.199557', 'step': 12868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:51.230564', 'step': 12868, 'epoch': 2} {'type': 'loss', 'content': 0.06809424608945847, 'timestamp': '2025-10-01 04:28:51.232651', 'step': 12869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:51.263300', 'step': 12869, 'epoch': 2} {'type': 'loss', 'content': 0.08994871377944946, 'timestamp': '2025-10-01 04:28:51.265586', 'step': 12870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.297355', 'step': 12870, 'epoch': 2} {'type': 'loss', 'content': 0.11980582773685455, 'timestamp': '2025-10-01 04:28:51.299544', 'step': 12871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:51.330568', 'step': 12871, 'epoch': 2} {'type': 'loss', 'content': 0.15223455429077148, 'timestamp': '2025-10-01 04:28:51.354538', 'step': 12872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.395374', 'step': 12872, 'epoch': 2} {'type': 'loss', 'content': 0.06171242520213127, 'timestamp': '2025-10-01 04:28:51.398072', 'step': 12873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:51.428950', 'step': 12873, 'epoch': 2} {'type': 'loss', 'content': 0.06449485570192337, 'timestamp': '2025-10-01 04:28:51.431460', 'step': 12874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.463159', 'step': 12874, 'epoch': 2} {'type': 'loss', 'content': 0.10778418183326721, 'timestamp': '2025-10-01 04:28:51.465416', 'step': 12875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.495884', 'step': 12875, 'epoch': 2} {'type': 'loss', 'content': 0.17538824677467346, 'timestamp': '2025-10-01 04:28:51.519474', 'step': 12876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:51.549521', 'step': 12876, 'epoch': 2} {'type': 'loss', 'content': 0.09672325104475021, 'timestamp': '2025-10-01 04:28:51.551680', 'step': 12877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:51.587116', 'step': 12877, 'epoch': 2} {'type': 'loss', 'content': 0.13441675901412964, 'timestamp': '2025-10-01 04:28:51.589314', 'step': 12878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.620021', 'step': 12878, 'epoch': 2} {'type': 'loss', 'content': 0.05659620091319084, 'timestamp': '2025-10-01 04:28:51.623128', 'step': 12879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:51.654098', 'step': 12879, 'epoch': 2} {'type': 'loss', 'content': 0.03882575407624245, 'timestamp': '2025-10-01 04:28:51.678035', 'step': 12880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.708806', 'step': 12880, 'epoch': 2} {'type': 'loss', 'content': 0.06860645860433578, 'timestamp': '2025-10-01 04:28:51.711131', 'step': 12881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.742175', 'step': 12881, 'epoch': 2} {'type': 'loss', 'content': 0.1337433159351349, 'timestamp': '2025-10-01 04:28:51.744424', 'step': 12882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.776555', 'step': 12882, 'epoch': 2} {'type': 'loss', 'content': 0.03525657579302788, 'timestamp': '2025-10-01 04:28:51.778791', 'step': 12883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:51.810716', 'step': 12883, 'epoch': 2} {'type': 'loss', 'content': 0.12681500613689423, 'timestamp': '2025-10-01 04:28:51.834354', 'step': 12884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:51.874966', 'step': 12884, 'epoch': 2} {'type': 'loss', 'content': 0.10209387540817261, 'timestamp': '2025-10-01 04:28:51.876845', 'step': 12885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:51.907720', 'step': 12885, 'epoch': 2} {'type': 'loss', 'content': 0.06503573060035706, 'timestamp': '2025-10-01 04:28:51.909933', 'step': 12886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:51.940630', 'step': 12886, 'epoch': 2} {'type': 'loss', 'content': 0.07077975571155548, 'timestamp': '2025-10-01 04:28:51.947084', 'step': 12887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:51.984668', 'step': 12887, 'epoch': 2} {'type': 'loss', 'content': 0.17135018110275269, 'timestamp': '2025-10-01 04:28:52.008688', 'step': 12888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:52.039735', 'step': 12888, 'epoch': 2} {'type': 'loss', 'content': 0.06094515323638916, 'timestamp': '2025-10-01 04:28:52.041997', 'step': 12889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:52.082227', 'step': 12889, 'epoch': 2} {'type': 'loss', 'content': 0.061016906052827835, 'timestamp': '2025-10-01 04:28:52.084564', 'step': 12890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:52.115412', 'step': 12890, 'epoch': 2} {'type': 'loss', 'content': 0.08888305723667145, 'timestamp': '2025-10-01 04:28:52.117871', 'step': 12891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:52.159440', 'step': 12891, 'epoch': 2} {'type': 'loss', 'content': 0.09643930941820145, 'timestamp': '2025-10-01 04:28:52.183348', 'step': 12892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:52.215817', 'step': 12892, 'epoch': 2} {'type': 'loss', 'content': 0.1200137585401535, 'timestamp': '2025-10-01 04:28:52.218747', 'step': 12893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:52.250528', 'step': 12893, 'epoch': 2} {'type': 'loss', 'content': 0.11255969852209091, 'timestamp': '2025-10-01 04:28:52.252624', 'step': 12894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:52.283844', 'step': 12894, 'epoch': 2} {'type': 'loss', 'content': 0.11670169979333878, 'timestamp': '2025-10-01 04:28:52.286027', 'step': 12895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:52.347401', 'step': 12895, 'epoch': 2} {'type': 'loss', 'content': 0.0897441878914833, 'timestamp': '2025-10-01 04:28:52.371135', 'step': 12896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:52.402546', 'step': 12896, 'epoch': 2} {'type': 'loss', 'content': 0.06942519545555115, 'timestamp': '2025-10-01 04:28:52.411884', 'step': 12897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:52.450797', 'step': 12897, 'epoch': 2} {'type': 'loss', 'content': 0.11060317605733871, 'timestamp': '2025-10-01 04:28:52.452898', 'step': 12898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:52.483682', 'step': 12898, 'epoch': 2} {'type': 'loss', 'content': 0.12668563425540924, 'timestamp': '2025-10-01 04:28:52.485929', 'step': 12899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:52.516501', 'step': 12899, 'epoch': 2} {'type': 'loss', 'content': 0.08593666553497314, 'timestamp': '2025-10-01 04:28:52.540215', 'step': 12900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:52.570963', 'step': 12900, 'epoch': 2} {'type': 'loss', 'content': 0.1379024237394333, 'timestamp': '2025-10-01 04:28:52.573550', 'step': 12901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:52.604193', 'step': 12901, 'epoch': 2} {'type': 'loss', 'content': 0.08084089308977127, 'timestamp': '2025-10-01 04:28:52.606178', 'step': 12902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:52.636697', 'step': 12902, 'epoch': 2} {'type': 'loss', 'content': 0.11173161119222641, 'timestamp': '2025-10-01 04:28:52.638974', 'step': 12903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:52.669624', 'step': 12903, 'epoch': 2} {'type': 'loss', 'content': 0.08391225337982178, 'timestamp': '2025-10-01 04:28:52.693137', 'step': 12904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:52.723236', 'step': 12904, 'epoch': 2} {'type': 'loss', 'content': 0.07437064498662949, 'timestamp': '2025-10-01 04:28:52.725860', 'step': 12905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:52.756383', 'step': 12905, 'epoch': 2} {'type': 'loss', 'content': 0.11769117414951324, 'timestamp': '2025-10-01 04:28:52.758568', 'step': 12906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:52.788275', 'step': 12906, 'epoch': 2} {'type': 'loss', 'content': 0.051955707371234894, 'timestamp': '2025-10-01 04:28:52.790319', 'step': 12907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:52.821531', 'step': 12907, 'epoch': 2} {'type': 'loss', 'content': 0.045281752943992615, 'timestamp': '2025-10-01 04:28:52.845118', 'step': 12908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:52.878085', 'step': 12908, 'epoch': 2} {'type': 'loss', 'content': 0.10387597233057022, 'timestamp': '2025-10-01 04:28:52.880746', 'step': 12909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:52.943934', 'step': 12909, 'epoch': 2} {'type': 'loss', 'content': 0.09030432999134064, 'timestamp': '2025-10-01 04:28:52.947069', 'step': 12910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:52.990287', 'step': 12910, 'epoch': 2} {'type': 'loss', 'content': 0.15797364711761475, 'timestamp': '2025-10-01 04:28:52.995525', 'step': 12911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:53.029891', 'step': 12911, 'epoch': 2} {'type': 'loss', 'content': 0.07536131888628006, 'timestamp': '2025-10-01 04:28:53.053410', 'step': 12912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:53.091804', 'step': 12912, 'epoch': 2} {'type': 'loss', 'content': 0.08970142900943756, 'timestamp': '2025-10-01 04:28:53.093875', 'step': 12913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:53.131648', 'step': 12913, 'epoch': 2} {'type': 'loss', 'content': 0.15475407242774963, 'timestamp': '2025-10-01 04:28:53.133881', 'step': 12914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:53.168886', 'step': 12914, 'epoch': 2} {'type': 'loss', 'content': 0.08609951287508011, 'timestamp': '2025-10-01 04:28:53.171005', 'step': 12915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:53.208502', 'step': 12915, 'epoch': 2} {'type': 'loss', 'content': 0.07785128802061081, 'timestamp': '2025-10-01 04:28:53.232397', 'step': 12916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:53.263699', 'step': 12916, 'epoch': 2} {'type': 'loss', 'content': 0.1120125874876976, 'timestamp': '2025-10-01 04:28:53.265852', 'step': 12917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:53.297592', 'step': 12917, 'epoch': 2} {'type': 'loss', 'content': 0.17641565203666687, 'timestamp': '2025-10-01 04:28:53.299580', 'step': 12918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:53.345141', 'step': 12918, 'epoch': 2} {'type': 'loss', 'content': 0.07766179740428925, 'timestamp': '2025-10-01 04:28:53.347375', 'step': 12919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:53.384693', 'step': 12919, 'epoch': 2} {'type': 'loss', 'content': 0.1254676878452301, 'timestamp': '2025-10-01 04:28:53.409020', 'step': 12920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:53.448015', 'step': 12920, 'epoch': 2} {'type': 'loss', 'content': 0.05867461487650871, 'timestamp': '2025-10-01 04:28:53.451252', 'step': 12921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:53.492055', 'step': 12921, 'epoch': 2} {'type': 'loss', 'content': 0.08102300763130188, 'timestamp': '2025-10-01 04:28:53.494803', 'step': 12922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:53.532446', 'step': 12922, 'epoch': 2} {'type': 'loss', 'content': 0.08440816402435303, 'timestamp': '2025-10-01 04:28:53.534524', 'step': 12923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:53.567415', 'step': 12923, 'epoch': 2} {'type': 'loss', 'content': 0.035013243556022644, 'timestamp': '2025-10-01 04:28:53.590996', 'step': 12924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:53.622439', 'step': 12924, 'epoch': 2} {'type': 'loss', 'content': 0.0444776713848114, 'timestamp': '2025-10-01 04:28:53.624509', 'step': 12925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:53.668720', 'step': 12925, 'epoch': 2} {'type': 'loss', 'content': 0.1448744386434555, 'timestamp': '2025-10-01 04:28:53.671022', 'step': 12926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:53.702365', 'step': 12926, 'epoch': 2} {'type': 'loss', 'content': 0.10005220025777817, 'timestamp': '2025-10-01 04:28:53.704917', 'step': 12927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:53.741610', 'step': 12927, 'epoch': 2} {'type': 'loss', 'content': 0.14205977320671082, 'timestamp': '2025-10-01 04:28:53.765241', 'step': 12928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:53.801165', 'step': 12928, 'epoch': 2} {'type': 'loss', 'content': 0.04758533090353012, 'timestamp': '2025-10-01 04:28:53.805352', 'step': 12929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:53.841615', 'step': 12929, 'epoch': 2} {'type': 'loss', 'content': 0.17856355011463165, 'timestamp': '2025-10-01 04:28:53.843878', 'step': 12930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:53.877484', 'step': 12930, 'epoch': 2} {'type': 'loss', 'content': 0.12021178752183914, 'timestamp': '2025-10-01 04:28:53.879820', 'step': 12931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:53.915092', 'step': 12931, 'epoch': 2} {'type': 'loss', 'content': 0.10482107102870941, 'timestamp': '2025-10-01 04:28:53.938668', 'step': 12932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:53.992695', 'step': 12932, 'epoch': 2} {'type': 'loss', 'content': 0.10404130816459656, 'timestamp': '2025-10-01 04:28:53.994741', 'step': 12933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:54.042306', 'step': 12933, 'epoch': 2} {'type': 'loss', 'content': 0.14217834174633026, 'timestamp': '2025-10-01 04:28:54.044469', 'step': 12934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:54.097125', 'step': 12934, 'epoch': 2} {'type': 'loss', 'content': 0.10371856391429901, 'timestamp': '2025-10-01 04:28:54.103015', 'step': 12935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:54.137477', 'step': 12935, 'epoch': 2} {'type': 'loss', 'content': 0.08907493948936462, 'timestamp': '2025-10-01 04:28:54.161109', 'step': 12936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:54.197263', 'step': 12936, 'epoch': 2} {'type': 'loss', 'content': 0.15936267375946045, 'timestamp': '2025-10-01 04:28:54.199896', 'step': 12937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:54.245983', 'step': 12937, 'epoch': 2} {'type': 'loss', 'content': 0.12406504899263382, 'timestamp': '2025-10-01 04:28:54.248378', 'step': 12938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:54.284061', 'step': 12938, 'epoch': 2} {'type': 'loss', 'content': 0.10915825515985489, 'timestamp': '2025-10-01 04:28:54.286253', 'step': 12939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:54.350613', 'step': 12939, 'epoch': 2} {'type': 'loss', 'content': 0.09328669309616089, 'timestamp': '2025-10-01 04:28:54.374211', 'step': 12940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:54.413103', 'step': 12940, 'epoch': 2} {'type': 'loss', 'content': 0.1783469170331955, 'timestamp': '2025-10-01 04:28:54.415176', 'step': 12941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:54.469338', 'step': 12941, 'epoch': 2} {'type': 'loss', 'content': 0.04127585515379906, 'timestamp': '2025-10-01 04:28:54.471495', 'step': 12942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:54.510252', 'step': 12942, 'epoch': 2} {'type': 'loss', 'content': 0.07865667343139648, 'timestamp': '2025-10-01 04:28:54.512477', 'step': 12943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:54.551626', 'step': 12943, 'epoch': 2} {'type': 'loss', 'content': 0.12496137619018555, 'timestamp': '2025-10-01 04:28:54.575189', 'step': 12944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:54.618498', 'step': 12944, 'epoch': 2} {'type': 'loss', 'content': 0.08249513804912567, 'timestamp': '2025-10-01 04:28:54.620868', 'step': 12945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:54.669800', 'step': 12945, 'epoch': 2} {'type': 'loss', 'content': 0.1686835139989853, 'timestamp': '2025-10-01 04:28:54.672031', 'step': 12946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:54.705277', 'step': 12946, 'epoch': 2} {'type': 'loss', 'content': 0.06683257222175598, 'timestamp': '2025-10-01 04:28:54.708502', 'step': 12947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:54.750352', 'step': 12947, 'epoch': 2} {'type': 'loss', 'content': 0.08805117756128311, 'timestamp': '2025-10-01 04:28:54.773997', 'step': 12948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:54.815809', 'step': 12948, 'epoch': 2} {'type': 'loss', 'content': 0.08191586285829544, 'timestamp': '2025-10-01 04:28:54.817941', 'step': 12949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:54.861026', 'step': 12949, 'epoch': 2} {'type': 'loss', 'content': 0.11182617396116257, 'timestamp': '2025-10-01 04:28:54.863042', 'step': 12950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:54.897682', 'step': 12950, 'epoch': 2} {'type': 'loss', 'content': 0.044907912611961365, 'timestamp': '2025-10-01 04:28:54.899934', 'step': 12951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:54.941824', 'step': 12951, 'epoch': 2} {'type': 'loss', 'content': 0.07471928000450134, 'timestamp': '2025-10-01 04:28:54.965488', 'step': 12952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:55.001946', 'step': 12952, 'epoch': 2} {'type': 'loss', 'content': 0.08833037316799164, 'timestamp': '2025-10-01 04:28:55.004123', 'step': 12953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:55.059974', 'step': 12953, 'epoch': 2} {'type': 'loss', 'content': 0.07576047629117966, 'timestamp': '2025-10-01 04:28:55.062243', 'step': 12954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:55.107436', 'step': 12954, 'epoch': 2} {'type': 'loss', 'content': 0.14938311278820038, 'timestamp': '2025-10-01 04:28:55.109782', 'step': 12955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:55.161331', 'step': 12955, 'epoch': 2} {'type': 'loss', 'content': 0.21667155623435974, 'timestamp': '2025-10-01 04:28:55.184929', 'step': 12956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:55.230042', 'step': 12956, 'epoch': 2} {'type': 'loss', 'content': 0.08538307249546051, 'timestamp': '2025-10-01 04:28:55.232204', 'step': 12957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:55.274083', 'step': 12957, 'epoch': 2} {'type': 'loss', 'content': 0.04866364225745201, 'timestamp': '2025-10-01 04:28:55.276264', 'step': 12958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:55.323701', 'step': 12958, 'epoch': 2} {'type': 'loss', 'content': 0.1088530644774437, 'timestamp': '2025-10-01 04:28:55.325949', 'step': 12959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:55.372910', 'step': 12959, 'epoch': 2} {'type': 'loss', 'content': 0.17485535144805908, 'timestamp': '2025-10-01 04:28:55.396842', 'step': 12960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:55.436240', 'step': 12960, 'epoch': 2} {'type': 'loss', 'content': 0.07830050587654114, 'timestamp': '2025-10-01 04:28:55.439937', 'step': 12961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:55.473601', 'step': 12961, 'epoch': 2} {'type': 'loss', 'content': 0.12746454775333405, 'timestamp': '2025-10-01 04:28:55.481853', 'step': 12962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:55.516188', 'step': 12962, 'epoch': 2} {'type': 'loss', 'content': 0.011352203786373138, 'timestamp': '2025-10-01 04:28:55.524065', 'step': 12963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:55.556816', 'step': 12963, 'epoch': 2} {'type': 'loss', 'content': 0.10939288139343262, 'timestamp': '2025-10-01 04:28:55.580601', 'step': 12964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:55.613234', 'step': 12964, 'epoch': 2} {'type': 'loss', 'content': 0.08741037547588348, 'timestamp': '2025-10-01 04:28:55.615409', 'step': 12965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:28:55.650228', 'step': 12965, 'epoch': 2} {'type': 'loss', 'content': 0.13630947470664978, 'timestamp': '2025-10-01 04:28:55.654638', 'step': 12966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:55.689821', 'step': 12966, 'epoch': 2} {'type': 'loss', 'content': 0.159628227353096, 'timestamp': '2025-10-01 04:28:55.691900', 'step': 12967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:55.726368', 'step': 12967, 'epoch': 2} {'type': 'loss', 'content': 0.14514200389385223, 'timestamp': '2025-10-01 04:28:55.749973', 'step': 12968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:55.783581', 'step': 12968, 'epoch': 2} {'type': 'loss', 'content': 0.11941518634557724, 'timestamp': '2025-10-01 04:28:55.785738', 'step': 12969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:55.826954', 'step': 12969, 'epoch': 2} {'type': 'loss', 'content': 0.08369921147823334, 'timestamp': '2025-10-01 04:28:55.829133', 'step': 12970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:55.864051', 'step': 12970, 'epoch': 2} {'type': 'loss', 'content': 0.10200299322605133, 'timestamp': '2025-10-01 04:28:55.866135', 'step': 12971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:55.899346', 'step': 12971, 'epoch': 2} {'type': 'loss', 'content': 0.05126649886369705, 'timestamp': '2025-10-01 04:28:55.922993', 'step': 12972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:55.956284', 'step': 12972, 'epoch': 2} {'type': 'loss', 'content': 0.09348773211240768, 'timestamp': '2025-10-01 04:28:55.958340', 'step': 12973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:55.992737', 'step': 12973, 'epoch': 2} {'type': 'loss', 'content': 0.07910066097974777, 'timestamp': '2025-10-01 04:28:55.994990', 'step': 12974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:56.038382', 'step': 12974, 'epoch': 2} {'type': 'loss', 'content': 0.10277039557695389, 'timestamp': '2025-10-01 04:28:56.040942', 'step': 12975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:56.074691', 'step': 12975, 'epoch': 2} {'type': 'loss', 'content': 0.06542368233203888, 'timestamp': '2025-10-01 04:28:56.098413', 'step': 12976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:56.137682', 'step': 12976, 'epoch': 2} {'type': 'loss', 'content': 0.08010059595108032, 'timestamp': '2025-10-01 04:28:56.139654', 'step': 12977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:56.174082', 'step': 12977, 'epoch': 2} {'type': 'loss', 'content': 0.14522521197795868, 'timestamp': '2025-10-01 04:28:56.176190', 'step': 12978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:56.208465', 'step': 12978, 'epoch': 2} {'type': 'loss', 'content': 0.09498932212591171, 'timestamp': '2025-10-01 04:28:56.210774', 'step': 12979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:56.255533', 'step': 12979, 'epoch': 2} {'type': 'loss', 'content': 0.07392674684524536, 'timestamp': '2025-10-01 04:28:56.278998', 'step': 12980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:56.313232', 'step': 12980, 'epoch': 2} {'type': 'loss', 'content': 0.08444531261920929, 'timestamp': '2025-10-01 04:28:56.315312', 'step': 12981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:56.359470', 'step': 12981, 'epoch': 2} {'type': 'loss', 'content': 0.06953628361225128, 'timestamp': '2025-10-01 04:28:56.361603', 'step': 12982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:28:56.394684', 'step': 12982, 'epoch': 2} {'type': 'loss', 'content': 0.05495977774262428, 'timestamp': '2025-10-01 04:28:56.398213', 'step': 12983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:56.460545', 'step': 12983, 'epoch': 2} {'type': 'loss', 'content': 0.08086805790662766, 'timestamp': '2025-10-01 04:28:56.484765', 'step': 12984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:56.520503', 'step': 12984, 'epoch': 2} {'type': 'loss', 'content': 0.04228406772017479, 'timestamp': '2025-10-01 04:28:56.523058', 'step': 12985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:28:56.566642', 'step': 12985, 'epoch': 2} {'type': 'loss', 'content': 0.16874000430107117, 'timestamp': '2025-10-01 04:28:56.573616', 'step': 12986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:28:56.606686', 'step': 12986, 'epoch': 2} {'type': 'loss', 'content': 0.08991411328315735, 'timestamp': '2025-10-01 04:28:56.610981', 'step': 12987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:28:56.645361', 'step': 12987, 'epoch': 2} {'type': 'loss', 'content': 0.10735580325126648, 'timestamp': '2025-10-01 04:28:56.675688', 'step': 12988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:56.736537', 'step': 12988, 'epoch': 2} {'type': 'loss', 'content': 0.051153890788555145, 'timestamp': '2025-10-01 04:28:56.738802', 'step': 12989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:56.783106', 'step': 12989, 'epoch': 2} {'type': 'loss', 'content': 0.03822407126426697, 'timestamp': '2025-10-01 04:28:56.785663', 'step': 12990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:56.830851', 'step': 12990, 'epoch': 2} {'type': 'loss', 'content': 0.11587724089622498, 'timestamp': '2025-10-01 04:28:56.833007', 'step': 12991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:56.868625', 'step': 12991, 'epoch': 2} {'type': 'loss', 'content': 0.1597512662410736, 'timestamp': '2025-10-01 04:28:56.892285', 'step': 12992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:56.925443', 'step': 12992, 'epoch': 2} {'type': 'loss', 'content': 0.0944143608212471, 'timestamp': '2025-10-01 04:28:56.927634', 'step': 12993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:56.979136', 'step': 12993, 'epoch': 2} {'type': 'loss', 'content': 0.15991944074630737, 'timestamp': '2025-10-01 04:28:56.981270', 'step': 12994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:28:57.027168', 'step': 12994, 'epoch': 2} {'type': 'loss', 'content': 0.11469251662492752, 'timestamp': '2025-10-01 04:28:57.031778', 'step': 12995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:57.064541', 'step': 12995, 'epoch': 2} {'type': 'loss', 'content': 0.14388170838356018, 'timestamp': '2025-10-01 04:28:57.088208', 'step': 12996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:28:57.121461', 'step': 12996, 'epoch': 2} {'type': 'loss', 'content': 0.06520309299230576, 'timestamp': '2025-10-01 04:28:57.123566', 'step': 12997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:28:57.166267', 'step': 12997, 'epoch': 2} {'type': 'loss', 'content': 0.13361422717571259, 'timestamp': '2025-10-01 04:28:57.168779', 'step': 12998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:28:57.210284', 'step': 12998, 'epoch': 2} {'type': 'loss', 'content': 0.1624578833580017, 'timestamp': '2025-10-01 04:28:57.213084', 'step': 12999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:28:57.254547', 'step': 12999, 'epoch': 2} {'type': 'loss', 'content': 0.12034203112125397, 'timestamp': '2025-10-01 04:28:57.278171', 'step': 13000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13000', 'timestamp': '2025-10-01 04:29:02.525235', 'step': 13000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:02.563869', 'step': 13000, 'epoch': 2} {'type': 'loss', 'content': 0.04605407267808914, 'timestamp': '2025-10-01 04:29:02.565912', 'step': 13001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:02.600811', 'step': 13001, 'epoch': 2} {'type': 'loss', 'content': 0.092305026948452, 'timestamp': '2025-10-01 04:29:02.602978', 'step': 13002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:02.636237', 'step': 13002, 'epoch': 2} {'type': 'loss', 'content': 0.0816706195473671, 'timestamp': '2025-10-01 04:29:02.638518', 'step': 13003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:02.682428', 'step': 13003, 'epoch': 2} {'type': 'loss', 'content': 0.10372807830572128, 'timestamp': '2025-10-01 04:29:02.706131', 'step': 13004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:02.739377', 'step': 13004, 'epoch': 2} {'type': 'loss', 'content': 0.06665101647377014, 'timestamp': '2025-10-01 04:29:02.741417', 'step': 13005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:02.775507', 'step': 13005, 'epoch': 2} {'type': 'loss', 'content': 0.11394555866718292, 'timestamp': '2025-10-01 04:29:02.777605', 'step': 13006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:02.810201', 'step': 13006, 'epoch': 2} {'type': 'loss', 'content': 0.06046124920248985, 'timestamp': '2025-10-01 04:29:02.812242', 'step': 13007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:02.848301', 'step': 13007, 'epoch': 2} {'type': 'loss', 'content': 0.14674681425094604, 'timestamp': '2025-10-01 04:29:02.871893', 'step': 13008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:02.907800', 'step': 13008, 'epoch': 2} {'type': 'loss', 'content': 0.1403343677520752, 'timestamp': '2025-10-01 04:29:02.909957', 'step': 13009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:02.950918', 'step': 13009, 'epoch': 2} {'type': 'loss', 'content': 0.07549858093261719, 'timestamp': '2025-10-01 04:29:02.953156', 'step': 13010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:02.986990', 'step': 13010, 'epoch': 2} {'type': 'loss', 'content': 0.2045612782239914, 'timestamp': '2025-10-01 04:29:02.989102', 'step': 13011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:03.022526', 'step': 13011, 'epoch': 2} {'type': 'loss', 'content': 0.18465664982795715, 'timestamp': '2025-10-01 04:29:03.046138', 'step': 13012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:03.080307', 'step': 13012, 'epoch': 2} {'type': 'loss', 'content': 0.22335174679756165, 'timestamp': '2025-10-01 04:29:03.082456', 'step': 13013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:03.117047', 'step': 13013, 'epoch': 2} {'type': 'loss', 'content': 0.15572181344032288, 'timestamp': '2025-10-01 04:29:03.119500', 'step': 13014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:03.160112', 'step': 13014, 'epoch': 2} {'type': 'loss', 'content': 0.07353811711072922, 'timestamp': '2025-10-01 04:29:03.162228', 'step': 13015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:03.195927', 'step': 13015, 'epoch': 2} {'type': 'loss', 'content': 0.07814473658800125, 'timestamp': '2025-10-01 04:29:03.219683', 'step': 13016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:03.255558', 'step': 13016, 'epoch': 2} {'type': 'loss', 'content': 0.11284122616052628, 'timestamp': '2025-10-01 04:29:03.257797', 'step': 13017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:03.292247', 'step': 13017, 'epoch': 2} {'type': 'loss', 'content': 0.08930707722902298, 'timestamp': '2025-10-01 04:29:03.294768', 'step': 13018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:03.344644', 'step': 13018, 'epoch': 2} {'type': 'loss', 'content': 0.07970669120550156, 'timestamp': '2025-10-01 04:29:03.352164', 'step': 13019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:03.397065', 'step': 13019, 'epoch': 2} {'type': 'loss', 'content': 0.07084666937589645, 'timestamp': '2025-10-01 04:29:03.420658', 'step': 13020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:03.469447', 'step': 13020, 'epoch': 2} {'type': 'loss', 'content': 0.08544950187206268, 'timestamp': '2025-10-01 04:29:03.471631', 'step': 13021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:03.508727', 'step': 13021, 'epoch': 2} {'type': 'loss', 'content': 0.06504836678504944, 'timestamp': '2025-10-01 04:29:03.511159', 'step': 13022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:29:03.552097', 'step': 13022, 'epoch': 2} {'type': 'loss', 'content': 0.10586423426866531, 'timestamp': '2025-10-01 04:29:03.556412', 'step': 13023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:29:03.594213', 'step': 13023, 'epoch': 2} {'type': 'loss', 'content': 0.10127980262041092, 'timestamp': '2025-10-01 04:29:03.619983', 'step': 13024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:29:03.658034', 'step': 13024, 'epoch': 2} {'type': 'loss', 'content': 0.1291400045156479, 'timestamp': '2025-10-01 04:29:03.660205', 'step': 13025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:03.705154', 'step': 13025, 'epoch': 2} {'type': 'loss', 'content': 0.2481791228055954, 'timestamp': '2025-10-01 04:29:03.707285', 'step': 13026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:03.747294', 'step': 13026, 'epoch': 2} {'type': 'loss', 'content': 0.07190380990505219, 'timestamp': '2025-10-01 04:29:03.750218', 'step': 13027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:03.792215', 'step': 13027, 'epoch': 2} {'type': 'loss', 'content': 0.09978099912405014, 'timestamp': '2025-10-01 04:29:03.815888', 'step': 13028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:03.853763', 'step': 13028, 'epoch': 2} {'type': 'loss', 'content': 0.14010167121887207, 'timestamp': '2025-10-01 04:29:03.856009', 'step': 13029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:03.892542', 'step': 13029, 'epoch': 2} {'type': 'loss', 'content': 0.1070856973528862, 'timestamp': '2025-10-01 04:29:03.896785', 'step': 13030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:03.934097', 'step': 13030, 'epoch': 2} {'type': 'loss', 'content': 0.0691162496805191, 'timestamp': '2025-10-01 04:29:03.938242', 'step': 13031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:03.969886', 'step': 13031, 'epoch': 2} {'type': 'loss', 'content': 0.12629885971546173, 'timestamp': '2025-10-01 04:29:03.993833', 'step': 13032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:04.027193', 'step': 13032, 'epoch': 2} {'type': 'loss', 'content': 0.14946302771568298, 'timestamp': '2025-10-01 04:29:04.029205', 'step': 13033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:04.073682', 'step': 13033, 'epoch': 2} {'type': 'loss', 'content': 0.07366243749856949, 'timestamp': '2025-10-01 04:29:04.075556', 'step': 13034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:29:04.111666', 'step': 13034, 'epoch': 2} {'type': 'loss', 'content': 0.06343792378902435, 'timestamp': '2025-10-01 04:29:04.115987', 'step': 13035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:04.152029', 'step': 13035, 'epoch': 2} {'type': 'loss', 'content': 0.02655959315598011, 'timestamp': '2025-10-01 04:29:04.175590', 'step': 13036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:04.218942', 'step': 13036, 'epoch': 2} {'type': 'loss', 'content': 0.13157697021961212, 'timestamp': '2025-10-01 04:29:04.221044', 'step': 13037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:04.255269', 'step': 13037, 'epoch': 2} {'type': 'loss', 'content': 0.07398135960102081, 'timestamp': '2025-10-01 04:29:04.257396', 'step': 13038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:04.300576', 'step': 13038, 'epoch': 2} {'type': 'loss', 'content': 0.10668014734983444, 'timestamp': '2025-10-01 04:29:04.302746', 'step': 13039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:04.343612', 'step': 13039, 'epoch': 2} {'type': 'loss', 'content': 0.15319766104221344, 'timestamp': '2025-10-01 04:29:04.369189', 'step': 13040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:04.402107', 'step': 13040, 'epoch': 2} {'type': 'loss', 'content': 0.07433497905731201, 'timestamp': '2025-10-01 04:29:04.408154', 'step': 13041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:04.445156', 'step': 13041, 'epoch': 2} {'type': 'loss', 'content': 0.10020382702350616, 'timestamp': '2025-10-01 04:29:04.447490', 'step': 13042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:04.488987', 'step': 13042, 'epoch': 2} {'type': 'loss', 'content': 0.0874866172671318, 'timestamp': '2025-10-01 04:29:04.491296', 'step': 13043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:04.522647', 'step': 13043, 'epoch': 2} {'type': 'loss', 'content': 0.08703871816396713, 'timestamp': '2025-10-01 04:29:04.546249', 'step': 13044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:04.593653', 'step': 13044, 'epoch': 2} {'type': 'loss', 'content': 0.1733647584915161, 'timestamp': '2025-10-01 04:29:04.597511', 'step': 13045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:04.638264', 'step': 13045, 'epoch': 2} {'type': 'loss', 'content': 0.03362252935767174, 'timestamp': '2025-10-01 04:29:04.640474', 'step': 13046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:04.675616', 'step': 13046, 'epoch': 2} {'type': 'loss', 'content': 0.06201952323317528, 'timestamp': '2025-10-01 04:29:04.677745', 'step': 13047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:04.717514', 'step': 13047, 'epoch': 2} {'type': 'loss', 'content': 0.06596903502941132, 'timestamp': '2025-10-01 04:29:04.740930', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:29:15.884962', 'step': 13048, 'epoch': 2} {'type': 'pplx', 'content': 9030.72280594041, 'timestamp': '2025-10-01 04:29:15.887837', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:15.917794', 'step': 13048, 'epoch': 2} {'type': 'loss', 'content': 0.10730930417776108, 'timestamp': '2025-10-01 04:29:15.919617', 'step': 13049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:15.958681', 'step': 13049, 'epoch': 2} {'type': 'loss', 'content': 0.07667375355958939, 'timestamp': '2025-10-01 04:29:15.960398', 'step': 13050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:16.000070', 'step': 13050, 'epoch': 2} {'type': 'loss', 'content': 0.12863297760486603, 'timestamp': '2025-10-01 04:29:16.002106', 'step': 13051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:16.043018', 'step': 13051, 'epoch': 2} {'type': 'loss', 'content': 0.05958395078778267, 'timestamp': '2025-10-01 04:29:16.066685', 'step': 13052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:16.098954', 'step': 13052, 'epoch': 2} {'type': 'loss', 'content': 0.09894334524869919, 'timestamp': '2025-10-01 04:29:16.101519', 'step': 13053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:16.134217', 'step': 13053, 'epoch': 2} {'type': 'loss', 'content': 0.10057667642831802, 'timestamp': '2025-10-01 04:29:16.136636', 'step': 13054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:16.176299', 'step': 13054, 'epoch': 2} {'type': 'loss', 'content': 0.08510853350162506, 'timestamp': '2025-10-01 04:29:16.178579', 'step': 13055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:16.214570', 'step': 13055, 'epoch': 2} {'type': 'loss', 'content': 0.08019328862428665, 'timestamp': '2025-10-01 04:29:16.238018', 'step': 13056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:16.270766', 'step': 13056, 'epoch': 2} {'type': 'loss', 'content': 0.20494191348552704, 'timestamp': '2025-10-01 04:29:16.272766', 'step': 13057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:16.304648', 'step': 13057, 'epoch': 2} {'type': 'loss', 'content': 0.07482118904590607, 'timestamp': '2025-10-01 04:29:16.306457', 'step': 13058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:16.342246', 'step': 13058, 'epoch': 2} {'type': 'loss', 'content': 0.13081134855747223, 'timestamp': '2025-10-01 04:29:16.351706', 'step': 13059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:16.387937', 'step': 13059, 'epoch': 2} {'type': 'loss', 'content': 0.07152460515499115, 'timestamp': '2025-10-01 04:29:16.417895', 'step': 13060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:16.453518', 'step': 13060, 'epoch': 2} {'type': 'loss', 'content': 0.1139233410358429, 'timestamp': '2025-10-01 04:29:16.455560', 'step': 13061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:16.489631', 'step': 13061, 'epoch': 2} {'type': 'loss', 'content': 0.1238555908203125, 'timestamp': '2025-10-01 04:29:16.491855', 'step': 13062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:16.524625', 'step': 13062, 'epoch': 2} {'type': 'loss', 'content': 0.1212439015507698, 'timestamp': '2025-10-01 04:29:16.526385', 'step': 13063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:16.560606', 'step': 13063, 'epoch': 2} {'type': 'loss', 'content': 0.0829543024301529, 'timestamp': '2025-10-01 04:29:16.584210', 'step': 13064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:16.619961', 'step': 13064, 'epoch': 2} {'type': 'loss', 'content': 0.11290405690670013, 'timestamp': '2025-10-01 04:29:16.621968', 'step': 13065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:16.666837', 'step': 13065, 'epoch': 2} {'type': 'loss', 'content': 0.04004492983222008, 'timestamp': '2025-10-01 04:29:16.669460', 'step': 13066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:16.706891', 'step': 13066, 'epoch': 2} {'type': 'loss', 'content': 0.10741595178842545, 'timestamp': '2025-10-01 04:29:16.710007', 'step': 13067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:16.749119', 'step': 13067, 'epoch': 2} {'type': 'loss', 'content': 0.11841800808906555, 'timestamp': '2025-10-01 04:29:16.773248', 'step': 13068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:16.810000', 'step': 13068, 'epoch': 2} {'type': 'loss', 'content': 0.0673392191529274, 'timestamp': '2025-10-01 04:29:16.812553', 'step': 13069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:16.844789', 'step': 13069, 'epoch': 2} {'type': 'loss', 'content': 0.05674761161208153, 'timestamp': '2025-10-01 04:29:16.847275', 'step': 13070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:16.879137', 'step': 13070, 'epoch': 2} {'type': 'loss', 'content': 0.05923432484269142, 'timestamp': '2025-10-01 04:29:16.881458', 'step': 13071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:16.920907', 'step': 13071, 'epoch': 2} {'type': 'loss', 'content': 0.10616806894540787, 'timestamp': '2025-10-01 04:29:16.944491', 'step': 13072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:16.976864', 'step': 13072, 'epoch': 2} {'type': 'loss', 'content': 0.1220930889248848, 'timestamp': '2025-10-01 04:29:16.978812', 'step': 13073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.017329', 'step': 13073, 'epoch': 2} {'type': 'loss', 'content': 0.08903869241476059, 'timestamp': '2025-10-01 04:29:17.019732', 'step': 13074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.060858', 'step': 13074, 'epoch': 2} {'type': 'loss', 'content': 0.11292929947376251, 'timestamp': '2025-10-01 04:29:17.063343', 'step': 13075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:17.101117', 'step': 13075, 'epoch': 2} {'type': 'loss', 'content': 0.055662885308265686, 'timestamp': '2025-10-01 04:29:17.125450', 'step': 13076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.161283', 'step': 13076, 'epoch': 2} {'type': 'loss', 'content': 0.09714636206626892, 'timestamp': '2025-10-01 04:29:17.163774', 'step': 13077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.218445', 'step': 13077, 'epoch': 2} {'type': 'loss', 'content': 0.10751902312040329, 'timestamp': '2025-10-01 04:29:17.220780', 'step': 13078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:17.260119', 'step': 13078, 'epoch': 2} {'type': 'loss', 'content': 0.07484658062458038, 'timestamp': '2025-10-01 04:29:17.262609', 'step': 13079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.298631', 'step': 13079, 'epoch': 2} {'type': 'loss', 'content': 0.11064110696315765, 'timestamp': '2025-10-01 04:29:17.324400', 'step': 13080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:17.372042', 'step': 13080, 'epoch': 2} {'type': 'loss', 'content': 0.17100656032562256, 'timestamp': '2025-10-01 04:29:17.374242', 'step': 13081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:17.406435', 'step': 13081, 'epoch': 2} {'type': 'loss', 'content': 0.11094833165407181, 'timestamp': '2025-10-01 04:29:17.411054', 'step': 13082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:17.442807', 'step': 13082, 'epoch': 2} {'type': 'loss', 'content': 0.09091395139694214, 'timestamp': '2025-10-01 04:29:17.445173', 'step': 13083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:17.476824', 'step': 13083, 'epoch': 2} {'type': 'loss', 'content': 0.1233544647693634, 'timestamp': '2025-10-01 04:29:17.506345', 'step': 13084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.544402', 'step': 13084, 'epoch': 2} {'type': 'loss', 'content': 0.13487249612808228, 'timestamp': '2025-10-01 04:29:17.547777', 'step': 13085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.585583', 'step': 13085, 'epoch': 2} {'type': 'loss', 'content': 0.17791065573692322, 'timestamp': '2025-10-01 04:29:17.587941', 'step': 13086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.627884', 'step': 13086, 'epoch': 2} {'type': 'loss', 'content': 0.23460134863853455, 'timestamp': '2025-10-01 04:29:17.632332', 'step': 13087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.663770', 'step': 13087, 'epoch': 2} {'type': 'loss', 'content': 0.09795614331960678, 'timestamp': '2025-10-01 04:29:17.687323', 'step': 13088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.722579', 'step': 13088, 'epoch': 2} {'type': 'loss', 'content': 0.08802340924739838, 'timestamp': '2025-10-01 04:29:17.726263', 'step': 13089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.757535', 'step': 13089, 'epoch': 2} {'type': 'loss', 'content': 0.12952493131160736, 'timestamp': '2025-10-01 04:29:17.759558', 'step': 13090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:17.795860', 'step': 13090, 'epoch': 2} {'type': 'loss', 'content': 0.12463068962097168, 'timestamp': '2025-10-01 04:29:17.806392', 'step': 13091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:17.849127', 'step': 13091, 'epoch': 2} {'type': 'loss', 'content': 0.13897469639778137, 'timestamp': '2025-10-01 04:29:17.872872', 'step': 13092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:17.911917', 'step': 13092, 'epoch': 2} {'type': 'loss', 'content': 0.1276249885559082, 'timestamp': '2025-10-01 04:29:17.914095', 'step': 13093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:17.956545', 'step': 13093, 'epoch': 2} {'type': 'loss', 'content': 0.07918599247932434, 'timestamp': '2025-10-01 04:29:17.958846', 'step': 13094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:18.006299', 'step': 13094, 'epoch': 2} {'type': 'loss', 'content': 0.1576864868402481, 'timestamp': '2025-10-01 04:29:18.008555', 'step': 13095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:18.056687', 'step': 13095, 'epoch': 2} {'type': 'loss', 'content': 0.1072155237197876, 'timestamp': '2025-10-01 04:29:18.080543', 'step': 13096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:18.129109', 'step': 13096, 'epoch': 2} {'type': 'loss', 'content': 0.08114288747310638, 'timestamp': '2025-10-01 04:29:18.131195', 'step': 13097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:29:18.166687', 'step': 13097, 'epoch': 2} {'type': 'loss', 'content': 0.07835275679826736, 'timestamp': '2025-10-01 04:29:18.170954', 'step': 13098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:18.212390', 'step': 13098, 'epoch': 2} {'type': 'loss', 'content': 0.12910376489162445, 'timestamp': '2025-10-01 04:29:18.219564', 'step': 13099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:18.260146', 'step': 13099, 'epoch': 2} {'type': 'loss', 'content': 0.09902709722518921, 'timestamp': '2025-10-01 04:29:18.283412', 'step': 13100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:18.326300', 'step': 13100, 'epoch': 2} {'type': 'loss', 'content': 0.07567945122718811, 'timestamp': '2025-10-01 04:29:18.327866', 'step': 13101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:18.359381', 'step': 13101, 'epoch': 2} {'type': 'loss', 'content': 0.210165873169899, 'timestamp': '2025-10-01 04:29:18.361790', 'step': 13102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:18.403568', 'step': 13102, 'epoch': 2} {'type': 'loss', 'content': 0.11609606444835663, 'timestamp': '2025-10-01 04:29:18.405455', 'step': 13103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:18.437892', 'step': 13103, 'epoch': 2} {'type': 'loss', 'content': 0.1565755158662796, 'timestamp': '2025-10-01 04:29:18.461216', 'step': 13104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:18.492784', 'step': 13104, 'epoch': 2} {'type': 'loss', 'content': 0.10203594714403152, 'timestamp': '2025-10-01 04:29:18.495051', 'step': 13105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:18.527925', 'step': 13105, 'epoch': 2} {'type': 'loss', 'content': 0.12620259821414948, 'timestamp': '2025-10-01 04:29:18.530086', 'step': 13106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:18.561192', 'step': 13106, 'epoch': 2} {'type': 'loss', 'content': 0.05364455655217171, 'timestamp': '2025-10-01 04:29:18.563267', 'step': 13107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:29:18.594693', 'step': 13107, 'epoch': 2} {'type': 'loss', 'content': 0.12878192961215973, 'timestamp': '2025-10-01 04:29:18.619934', 'step': 13108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:18.653876', 'step': 13108, 'epoch': 2} {'type': 'loss', 'content': 0.09532003849744797, 'timestamp': '2025-10-01 04:29:18.655760', 'step': 13109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:18.686857', 'step': 13109, 'epoch': 2} {'type': 'loss', 'content': 0.10082074254751205, 'timestamp': '2025-10-01 04:29:18.688959', 'step': 13110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:18.720686', 'step': 13110, 'epoch': 2} {'type': 'loss', 'content': 0.07608598470687866, 'timestamp': '2025-10-01 04:29:18.722851', 'step': 13111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:18.753872', 'step': 13111, 'epoch': 2} {'type': 'loss', 'content': 0.02296116016805172, 'timestamp': '2025-10-01 04:29:18.781978', 'step': 13112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:18.817948', 'step': 13112, 'epoch': 2} {'type': 'loss', 'content': 0.06388367712497711, 'timestamp': '2025-10-01 04:29:18.820082', 'step': 13113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:18.852117', 'step': 13113, 'epoch': 2} {'type': 'loss', 'content': 0.12187454849481583, 'timestamp': '2025-10-01 04:29:18.854561', 'step': 13114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:18.887358', 'step': 13114, 'epoch': 2} {'type': 'loss', 'content': 0.09748193621635437, 'timestamp': '2025-10-01 04:29:18.889199', 'step': 13115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:18.921823', 'step': 13115, 'epoch': 2} {'type': 'loss', 'content': 0.16462579369544983, 'timestamp': '2025-10-01 04:29:18.945254', 'step': 13116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:18.975929', 'step': 13116, 'epoch': 2} {'type': 'loss', 'content': 0.08409716188907623, 'timestamp': '2025-10-01 04:29:18.977796', 'step': 13117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:19.008384', 'step': 13117, 'epoch': 2} {'type': 'loss', 'content': 0.1813865303993225, 'timestamp': '2025-10-01 04:29:19.011472', 'step': 13118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:19.043289', 'step': 13118, 'epoch': 2} {'type': 'loss', 'content': 0.05433810502290726, 'timestamp': '2025-10-01 04:29:19.045472', 'step': 13119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:19.078923', 'step': 13119, 'epoch': 2} {'type': 'loss', 'content': 0.08347928524017334, 'timestamp': '2025-10-01 04:29:19.102551', 'step': 13120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:19.150793', 'step': 13120, 'epoch': 2} {'type': 'loss', 'content': 0.07508016377687454, 'timestamp': '2025-10-01 04:29:19.152779', 'step': 13121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:19.184456', 'step': 13121, 'epoch': 2} {'type': 'loss', 'content': 0.13084715604782104, 'timestamp': '2025-10-01 04:29:19.186578', 'step': 13122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:19.221120', 'step': 13122, 'epoch': 2} {'type': 'loss', 'content': 0.06442694365978241, 'timestamp': '2025-10-01 04:29:19.223122', 'step': 13123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:19.255886', 'step': 13123, 'epoch': 2} {'type': 'loss', 'content': 0.09615138173103333, 'timestamp': '2025-10-01 04:29:19.279481', 'step': 13124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:19.315368', 'step': 13124, 'epoch': 2} {'type': 'loss', 'content': 0.09178714454174042, 'timestamp': '2025-10-01 04:29:19.317419', 'step': 13125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:19.349340', 'step': 13125, 'epoch': 2} {'type': 'loss', 'content': 0.04270897060632706, 'timestamp': '2025-10-01 04:29:19.351551', 'step': 13126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:19.388945', 'step': 13126, 'epoch': 2} {'type': 'loss', 'content': 0.06076176092028618, 'timestamp': '2025-10-01 04:29:19.391160', 'step': 13127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:19.422606', 'step': 13127, 'epoch': 2} {'type': 'loss', 'content': 0.10231669992208481, 'timestamp': '2025-10-01 04:29:19.446505', 'step': 13128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:29:19.478511', 'step': 13128, 'epoch': 2} {'type': 'loss', 'content': 0.04457929730415344, 'timestamp': '2025-10-01 04:29:19.480576', 'step': 13129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:19.520055', 'step': 13129, 'epoch': 2} {'type': 'loss', 'content': 0.051952723413705826, 'timestamp': '2025-10-01 04:29:19.522364', 'step': 13130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:19.560994', 'step': 13130, 'epoch': 2} {'type': 'loss', 'content': 0.10961298644542694, 'timestamp': '2025-10-01 04:29:19.563143', 'step': 13131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:19.595876', 'step': 13131, 'epoch': 2} {'type': 'loss', 'content': 0.15333528816699982, 'timestamp': '2025-10-01 04:29:19.620284', 'step': 13132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:19.668401', 'step': 13132, 'epoch': 2} {'type': 'loss', 'content': 0.1720273643732071, 'timestamp': '2025-10-01 04:29:19.670642', 'step': 13133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:19.702854', 'step': 13133, 'epoch': 2} {'type': 'loss', 'content': 0.07339958101511002, 'timestamp': '2025-10-01 04:29:19.704893', 'step': 13134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:19.737716', 'step': 13134, 'epoch': 2} {'type': 'loss', 'content': 0.09555080533027649, 'timestamp': '2025-10-01 04:29:19.739768', 'step': 13135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:19.771478', 'step': 13135, 'epoch': 2} {'type': 'loss', 'content': 0.10514194518327713, 'timestamp': '2025-10-01 04:29:19.795392', 'step': 13136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:19.837109', 'step': 13136, 'epoch': 2} {'type': 'loss', 'content': 0.08115518093109131, 'timestamp': '2025-10-01 04:29:19.839047', 'step': 13137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:19.888479', 'step': 13137, 'epoch': 2} {'type': 'loss', 'content': 0.06283611059188843, 'timestamp': '2025-10-01 04:29:19.890549', 'step': 13138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:19.937800', 'step': 13138, 'epoch': 2} {'type': 'loss', 'content': 0.051462262868881226, 'timestamp': '2025-10-01 04:29:19.939871', 'step': 13139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:19.990871', 'step': 13139, 'epoch': 2} {'type': 'loss', 'content': 0.08747453987598419, 'timestamp': '2025-10-01 04:29:20.014325', 'step': 13140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:20.048723', 'step': 13140, 'epoch': 2} {'type': 'loss', 'content': 0.10675860941410065, 'timestamp': '2025-10-01 04:29:20.050870', 'step': 13141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:20.090804', 'step': 13141, 'epoch': 2} {'type': 'loss', 'content': 0.12054207921028137, 'timestamp': '2025-10-01 04:29:20.093318', 'step': 13142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:20.126220', 'step': 13142, 'epoch': 2} {'type': 'loss', 'content': 0.09974686056375504, 'timestamp': '2025-10-01 04:29:20.128427', 'step': 13143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:20.160862', 'step': 13143, 'epoch': 2} {'type': 'loss', 'content': 0.2052578628063202, 'timestamp': '2025-10-01 04:29:20.184396', 'step': 13144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:20.215517', 'step': 13144, 'epoch': 2} {'type': 'loss', 'content': 0.0896865501999855, 'timestamp': '2025-10-01 04:29:20.217767', 'step': 13145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:20.263078', 'step': 13145, 'epoch': 2} {'type': 'loss', 'content': 0.10719042271375656, 'timestamp': '2025-10-01 04:29:20.265080', 'step': 13146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:20.299170', 'step': 13146, 'epoch': 2} {'type': 'loss', 'content': 0.09988418966531754, 'timestamp': '2025-10-01 04:29:20.301219', 'step': 13147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:20.336862', 'step': 13147, 'epoch': 2} {'type': 'loss', 'content': 0.11218877881765366, 'timestamp': '2025-10-01 04:29:20.360410', 'step': 13148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:20.406846', 'step': 13148, 'epoch': 2} {'type': 'loss', 'content': 0.1013946458697319, 'timestamp': '2025-10-01 04:29:20.409004', 'step': 13149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:20.444199', 'step': 13149, 'epoch': 2} {'type': 'loss', 'content': 0.14003166556358337, 'timestamp': '2025-10-01 04:29:20.446579', 'step': 13150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:20.488071', 'step': 13150, 'epoch': 2} {'type': 'loss', 'content': 0.1325327455997467, 'timestamp': '2025-10-01 04:29:20.490500', 'step': 13151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:20.546887', 'step': 13151, 'epoch': 2} {'type': 'loss', 'content': 0.09304540604352951, 'timestamp': '2025-10-01 04:29:20.570518', 'step': 13152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:20.605363', 'step': 13152, 'epoch': 2} {'type': 'loss', 'content': 0.07682511955499649, 'timestamp': '2025-10-01 04:29:20.607422', 'step': 13153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:20.646185', 'step': 13153, 'epoch': 2} {'type': 'loss', 'content': 0.14556553959846497, 'timestamp': '2025-10-01 04:29:20.648383', 'step': 13154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:20.690437', 'step': 13154, 'epoch': 2} {'type': 'loss', 'content': 0.12287379801273346, 'timestamp': '2025-10-01 04:29:20.692864', 'step': 13155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:20.726164', 'step': 13155, 'epoch': 2} {'type': 'loss', 'content': 0.15875303745269775, 'timestamp': '2025-10-01 04:29:20.749838', 'step': 13156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:20.790153', 'step': 13156, 'epoch': 2} {'type': 'loss', 'content': 0.08624663203954697, 'timestamp': '2025-10-01 04:29:20.792181', 'step': 13157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:20.825186', 'step': 13157, 'epoch': 2} {'type': 'loss', 'content': 0.11620625108480453, 'timestamp': '2025-10-01 04:29:20.827220', 'step': 13158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:20.867975', 'step': 13158, 'epoch': 2} {'type': 'loss', 'content': 0.09745965152978897, 'timestamp': '2025-10-01 04:29:20.870301', 'step': 13159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:20.902687', 'step': 13159, 'epoch': 2} {'type': 'loss', 'content': 0.038623686879873276, 'timestamp': '2025-10-01 04:29:20.926194', 'step': 13160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:20.959881', 'step': 13160, 'epoch': 2} {'type': 'loss', 'content': 0.10683589428663254, 'timestamp': '2025-10-01 04:29:20.962003', 'step': 13161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:20.995441', 'step': 13161, 'epoch': 2} {'type': 'loss', 'content': 0.059812918305397034, 'timestamp': '2025-10-01 04:29:20.997525', 'step': 13162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:21.029117', 'step': 13162, 'epoch': 2} {'type': 'loss', 'content': 0.11880165338516235, 'timestamp': '2025-10-01 04:29:21.031219', 'step': 13163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:21.069410', 'step': 13163, 'epoch': 2} {'type': 'loss', 'content': 0.07365281134843826, 'timestamp': '2025-10-01 04:29:21.092945', 'step': 13164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:21.126789', 'step': 13164, 'epoch': 2} {'type': 'loss', 'content': 0.09668229520320892, 'timestamp': '2025-10-01 04:29:21.132427', 'step': 13165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:21.187620', 'step': 13165, 'epoch': 2} {'type': 'loss', 'content': 0.09621169418096542, 'timestamp': '2025-10-01 04:29:21.189741', 'step': 13166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:21.221481', 'step': 13166, 'epoch': 2} {'type': 'loss', 'content': 0.09072031825780869, 'timestamp': '2025-10-01 04:29:21.224417', 'step': 13167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:21.268061', 'step': 13167, 'epoch': 2} {'type': 'loss', 'content': 0.09399925172328949, 'timestamp': '2025-10-01 04:29:21.291880', 'step': 13168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:21.323791', 'step': 13168, 'epoch': 2} {'type': 'loss', 'content': 0.10872799903154373, 'timestamp': '2025-10-01 04:29:21.325856', 'step': 13169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:21.356475', 'step': 13169, 'epoch': 2} {'type': 'loss', 'content': 0.08832595497369766, 'timestamp': '2025-10-01 04:29:21.359444', 'step': 13170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:21.390139', 'step': 13170, 'epoch': 2} {'type': 'loss', 'content': 0.1108054518699646, 'timestamp': '2025-10-01 04:29:21.392307', 'step': 13171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:21.426142', 'step': 13171, 'epoch': 2} {'type': 'loss', 'content': 0.03997000679373741, 'timestamp': '2025-10-01 04:29:21.449644', 'step': 13172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:21.483286', 'step': 13172, 'epoch': 2} {'type': 'loss', 'content': 0.044415079057216644, 'timestamp': '2025-10-01 04:29:21.485422', 'step': 13173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:21.516932', 'step': 13173, 'epoch': 2} {'type': 'loss', 'content': 0.0717543512582779, 'timestamp': '2025-10-01 04:29:21.518940', 'step': 13174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:21.553689', 'step': 13174, 'epoch': 2} {'type': 'loss', 'content': 0.08141470700502396, 'timestamp': '2025-10-01 04:29:21.556568', 'step': 13175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:21.588628', 'step': 13175, 'epoch': 2} {'type': 'loss', 'content': 0.059529002755880356, 'timestamp': '2025-10-01 04:29:21.612870', 'step': 13176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:21.647510', 'step': 13176, 'epoch': 2} {'type': 'loss', 'content': 0.10062035173177719, 'timestamp': '2025-10-01 04:29:21.649708', 'step': 13177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:21.682642', 'step': 13177, 'epoch': 2} {'type': 'loss', 'content': 0.044667959213256836, 'timestamp': '2025-10-01 04:29:21.684904', 'step': 13178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:21.716286', 'step': 13178, 'epoch': 2} {'type': 'loss', 'content': 0.0887589305639267, 'timestamp': '2025-10-01 04:29:21.718373', 'step': 13179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:21.750586', 'step': 13179, 'epoch': 2} {'type': 'loss', 'content': 0.1338951736688614, 'timestamp': '2025-10-01 04:29:21.774183', 'step': 13180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:21.806444', 'step': 13180, 'epoch': 2} {'type': 'loss', 'content': 0.07471779733896255, 'timestamp': '2025-10-01 04:29:21.808492', 'step': 13181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:21.841570', 'step': 13181, 'epoch': 2} {'type': 'loss', 'content': 0.15270070731639862, 'timestamp': '2025-10-01 04:29:21.843659', 'step': 13182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:21.878842', 'step': 13182, 'epoch': 2} {'type': 'loss', 'content': 0.10542496293783188, 'timestamp': '2025-10-01 04:29:21.880837', 'step': 13183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:21.912194', 'step': 13183, 'epoch': 2} {'type': 'loss', 'content': 0.055938296020030975, 'timestamp': '2025-10-01 04:29:21.935915', 'step': 13184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:21.968743', 'step': 13184, 'epoch': 2} {'type': 'loss', 'content': 0.06769556552171707, 'timestamp': '2025-10-01 04:29:21.970875', 'step': 13185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:22.002433', 'step': 13185, 'epoch': 2} {'type': 'loss', 'content': 0.1535886526107788, 'timestamp': '2025-10-01 04:29:22.004785', 'step': 13186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:22.037743', 'step': 13186, 'epoch': 2} {'type': 'loss', 'content': 0.10372714698314667, 'timestamp': '2025-10-01 04:29:22.040205', 'step': 13187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:22.071621', 'step': 13187, 'epoch': 2} {'type': 'loss', 'content': 0.06869517266750336, 'timestamp': '2025-10-01 04:29:22.100704', 'step': 13188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:22.132251', 'step': 13188, 'epoch': 2} {'type': 'loss', 'content': 0.07126126438379288, 'timestamp': '2025-10-01 04:29:22.134352', 'step': 13189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:22.166258', 'step': 13189, 'epoch': 2} {'type': 'loss', 'content': 0.13093219697475433, 'timestamp': '2025-10-01 04:29:22.168300', 'step': 13190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:22.201051', 'step': 13190, 'epoch': 2} {'type': 'loss', 'content': 0.08220477402210236, 'timestamp': '2025-10-01 04:29:22.203565', 'step': 13191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:22.243162', 'step': 13191, 'epoch': 2} {'type': 'loss', 'content': 0.0678710862994194, 'timestamp': '2025-10-01 04:29:22.266781', 'step': 13192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:22.298822', 'step': 13192, 'epoch': 2} {'type': 'loss', 'content': 0.06995068490505219, 'timestamp': '2025-10-01 04:29:22.300855', 'step': 13193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:22.339330', 'step': 13193, 'epoch': 2} {'type': 'loss', 'content': 0.08844029158353806, 'timestamp': '2025-10-01 04:29:22.341466', 'step': 13194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:22.373415', 'step': 13194, 'epoch': 2} {'type': 'loss', 'content': 0.06243057921528816, 'timestamp': '2025-10-01 04:29:22.376217', 'step': 13195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:22.412693', 'step': 13195, 'epoch': 2} {'type': 'loss', 'content': 0.11443869024515152, 'timestamp': '2025-10-01 04:29:22.436138', 'step': 13196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:22.468404', 'step': 13196, 'epoch': 2} {'type': 'loss', 'content': 0.11340036988258362, 'timestamp': '2025-10-01 04:29:22.472219', 'step': 13197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:22.506013', 'step': 13197, 'epoch': 2} {'type': 'loss', 'content': 0.10202404111623764, 'timestamp': '2025-10-01 04:29:22.511138', 'step': 13198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:22.553617', 'step': 13198, 'epoch': 2} {'type': 'loss', 'content': 0.08924552798271179, 'timestamp': '2025-10-01 04:29:22.557249', 'step': 13199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:22.598688', 'step': 13199, 'epoch': 2} {'type': 'loss', 'content': 0.0917266234755516, 'timestamp': '2025-10-01 04:29:22.622325', 'step': 13200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:22.655422', 'step': 13200, 'epoch': 2} {'type': 'loss', 'content': 0.07761452347040176, 'timestamp': '2025-10-01 04:29:22.657749', 'step': 13201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:22.699483', 'step': 13201, 'epoch': 2} {'type': 'loss', 'content': 0.07415889203548431, 'timestamp': '2025-10-01 04:29:22.702314', 'step': 13202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:22.736218', 'step': 13202, 'epoch': 2} {'type': 'loss', 'content': 0.09744347631931305, 'timestamp': '2025-10-01 04:29:22.739015', 'step': 13203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:22.769436', 'step': 13203, 'epoch': 2} {'type': 'loss', 'content': 0.09950011223554611, 'timestamp': '2025-10-01 04:29:22.795131', 'step': 13204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:22.826698', 'step': 13204, 'epoch': 2} {'type': 'loss', 'content': 0.09079942107200623, 'timestamp': '2025-10-01 04:29:22.828750', 'step': 13205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:22.861045', 'step': 13205, 'epoch': 2} {'type': 'loss', 'content': 0.15256761014461517, 'timestamp': '2025-10-01 04:29:22.863046', 'step': 13206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:22.898578', 'step': 13206, 'epoch': 2} {'type': 'loss', 'content': 0.07051301747560501, 'timestamp': '2025-10-01 04:29:22.902979', 'step': 13207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:22.933456', 'step': 13207, 'epoch': 2} {'type': 'loss', 'content': 0.08655451238155365, 'timestamp': '2025-10-01 04:29:22.963811', 'step': 13208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:22.994461', 'step': 13208, 'epoch': 2} {'type': 'loss', 'content': 0.13320134580135345, 'timestamp': '2025-10-01 04:29:22.996985', 'step': 13209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:23.029900', 'step': 13209, 'epoch': 2} {'type': 'loss', 'content': 0.11731542646884918, 'timestamp': '2025-10-01 04:29:23.031827', 'step': 13210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:23.067282', 'step': 13210, 'epoch': 2} {'type': 'loss', 'content': 0.11484014242887497, 'timestamp': '2025-10-01 04:29:23.070542', 'step': 13211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:23.101227', 'step': 13211, 'epoch': 2} {'type': 'loss', 'content': 0.04839470237493515, 'timestamp': '2025-10-01 04:29:23.128592', 'step': 13212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:23.163794', 'step': 13212, 'epoch': 2} {'type': 'loss', 'content': 0.06460412591695786, 'timestamp': '2025-10-01 04:29:23.166040', 'step': 13213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:23.199645', 'step': 13213, 'epoch': 2} {'type': 'loss', 'content': 0.0710783377289772, 'timestamp': '2025-10-01 04:29:23.201759', 'step': 13214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:23.239115', 'step': 13214, 'epoch': 2} {'type': 'loss', 'content': 0.07139814645051956, 'timestamp': '2025-10-01 04:29:23.241581', 'step': 13215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:23.276826', 'step': 13215, 'epoch': 2} {'type': 'loss', 'content': 0.06285656243562698, 'timestamp': '2025-10-01 04:29:23.300667', 'step': 13216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:23.336929', 'step': 13216, 'epoch': 2} {'type': 'loss', 'content': 0.12756013870239258, 'timestamp': '2025-10-01 04:29:23.339941', 'step': 13217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:23.372634', 'step': 13217, 'epoch': 2} {'type': 'loss', 'content': 0.17746227979660034, 'timestamp': '2025-10-01 04:29:23.374974', 'step': 13218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:23.408560', 'step': 13218, 'epoch': 2} {'type': 'loss', 'content': 0.07298977673053741, 'timestamp': '2025-10-01 04:29:23.410682', 'step': 13219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:23.456717', 'step': 13219, 'epoch': 2} {'type': 'loss', 'content': 0.06962969899177551, 'timestamp': '2025-10-01 04:29:23.480355', 'step': 13220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:23.526575', 'step': 13220, 'epoch': 2} {'type': 'loss', 'content': 0.0882304385304451, 'timestamp': '2025-10-01 04:29:23.528519', 'step': 13221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:23.560019', 'step': 13221, 'epoch': 2} {'type': 'loss', 'content': 0.12981566786766052, 'timestamp': '2025-10-01 04:29:23.562130', 'step': 13222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:23.592989', 'step': 13222, 'epoch': 2} {'type': 'loss', 'content': 0.028481265529990196, 'timestamp': '2025-10-01 04:29:23.595019', 'step': 13223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:23.624883', 'step': 13223, 'epoch': 2} {'type': 'loss', 'content': 0.0719735324382782, 'timestamp': '2025-10-01 04:29:23.648493', 'step': 13224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:23.680870', 'step': 13224, 'epoch': 2} {'type': 'loss', 'content': 0.21301642060279846, 'timestamp': '2025-10-01 04:29:23.683025', 'step': 13225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:23.713629', 'step': 13225, 'epoch': 2} {'type': 'loss', 'content': 0.13143517076969147, 'timestamp': '2025-10-01 04:29:23.716521', 'step': 13226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:23.747465', 'step': 13226, 'epoch': 2} {'type': 'loss', 'content': 0.12498095631599426, 'timestamp': '2025-10-01 04:29:23.749944', 'step': 13227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:23.781298', 'step': 13227, 'epoch': 2} {'type': 'loss', 'content': 0.11594343930482864, 'timestamp': '2025-10-01 04:29:23.805124', 'step': 13228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:23.845505', 'step': 13228, 'epoch': 2} {'type': 'loss', 'content': 0.14481399953365326, 'timestamp': '2025-10-01 04:29:23.847554', 'step': 13229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:23.882580', 'step': 13229, 'epoch': 2} {'type': 'loss', 'content': 0.039847902953624725, 'timestamp': '2025-10-01 04:29:23.885350', 'step': 13230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:23.922523', 'step': 13230, 'epoch': 2} {'type': 'loss', 'content': 0.1788720339536667, 'timestamp': '2025-10-01 04:29:23.925204', 'step': 13231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:23.973435', 'step': 13231, 'epoch': 2} {'type': 'loss', 'content': 0.21711841225624084, 'timestamp': '2025-10-01 04:29:23.997253', 'step': 13232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:24.047371', 'step': 13232, 'epoch': 2} {'type': 'loss', 'content': 0.08442015200853348, 'timestamp': '2025-10-01 04:29:24.049616', 'step': 13233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:24.115021', 'step': 13233, 'epoch': 2} {'type': 'loss', 'content': 0.06543213129043579, 'timestamp': '2025-10-01 04:29:24.118947', 'step': 13234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:24.168111', 'step': 13234, 'epoch': 2} {'type': 'loss', 'content': 0.1525791436433792, 'timestamp': '2025-10-01 04:29:24.170121', 'step': 13235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:24.204267', 'step': 13235, 'epoch': 2} {'type': 'loss', 'content': 0.06404099613428116, 'timestamp': '2025-10-01 04:29:24.227821', 'step': 13236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:24.267381', 'step': 13236, 'epoch': 2} {'type': 'loss', 'content': 0.09118321537971497, 'timestamp': '2025-10-01 04:29:24.277053', 'step': 13237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:24.337128', 'step': 13237, 'epoch': 2} {'type': 'loss', 'content': 0.09900187700986862, 'timestamp': '2025-10-01 04:29:24.339348', 'step': 13238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:24.379631', 'step': 13238, 'epoch': 2} {'type': 'loss', 'content': 0.11845353245735168, 'timestamp': '2025-10-01 04:29:24.382429', 'step': 13239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:24.444138', 'step': 13239, 'epoch': 2} {'type': 'loss', 'content': 0.08030720800161362, 'timestamp': '2025-10-01 04:29:24.468358', 'step': 13240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:24.517183', 'step': 13240, 'epoch': 2} {'type': 'loss', 'content': 0.12277461588382721, 'timestamp': '2025-10-01 04:29:24.520669', 'step': 13241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:24.575533', 'step': 13241, 'epoch': 2} {'type': 'loss', 'content': 0.06511453539133072, 'timestamp': '2025-10-01 04:29:24.577905', 'step': 13242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:24.633859', 'step': 13242, 'epoch': 2} {'type': 'loss', 'content': 0.14122621715068817, 'timestamp': '2025-10-01 04:29:24.636242', 'step': 13243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:24.667586', 'step': 13243, 'epoch': 2} {'type': 'loss', 'content': 0.05799791216850281, 'timestamp': '2025-10-01 04:29:24.691234', 'step': 13244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:24.738071', 'step': 13244, 'epoch': 2} {'type': 'loss', 'content': 0.12310168892145157, 'timestamp': '2025-10-01 04:29:24.740170', 'step': 13245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:24.782439', 'step': 13245, 'epoch': 2} {'type': 'loss', 'content': 0.1355428844690323, 'timestamp': '2025-10-01 04:29:24.795743', 'step': 13246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:24.829477', 'step': 13246, 'epoch': 2} {'type': 'loss', 'content': 0.12114594131708145, 'timestamp': '2025-10-01 04:29:24.832101', 'step': 13247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:24.871600', 'step': 13247, 'epoch': 2} {'type': 'loss', 'content': 0.040318138897418976, 'timestamp': '2025-10-01 04:29:24.900330', 'step': 13248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:24.970683', 'step': 13248, 'epoch': 2} {'type': 'loss', 'content': 0.0925755575299263, 'timestamp': '2025-10-01 04:29:24.979258', 'step': 13249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:25.023991', 'step': 13249, 'epoch': 2} {'type': 'loss', 'content': 0.1426583081483841, 'timestamp': '2025-10-01 04:29:25.026753', 'step': 13250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:25.066522', 'step': 13250, 'epoch': 2} {'type': 'loss', 'content': 0.10460633784532547, 'timestamp': '2025-10-01 04:29:25.068552', 'step': 13251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:25.139704', 'step': 13251, 'epoch': 2} {'type': 'loss', 'content': 0.09070434421300888, 'timestamp': '2025-10-01 04:29:25.177120', 'step': 13252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:25.207752', 'step': 13252, 'epoch': 2} {'type': 'loss', 'content': 0.07853738218545914, 'timestamp': '2025-10-01 04:29:25.209645', 'step': 13253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:25.240878', 'step': 13253, 'epoch': 2} {'type': 'loss', 'content': 0.07508091628551483, 'timestamp': '2025-10-01 04:29:25.243065', 'step': 13254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:25.278614', 'step': 13254, 'epoch': 2} {'type': 'loss', 'content': 0.08339181542396545, 'timestamp': '2025-10-01 04:29:25.280662', 'step': 13255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:25.312537', 'step': 13255, 'epoch': 2} {'type': 'loss', 'content': 0.13426271080970764, 'timestamp': '2025-10-01 04:29:25.344448', 'step': 13256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:25.388279', 'step': 13256, 'epoch': 2} {'type': 'loss', 'content': 0.088423952460289, 'timestamp': '2025-10-01 04:29:25.391790', 'step': 13257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:25.423123', 'step': 13257, 'epoch': 2} {'type': 'loss', 'content': 0.0380220040678978, 'timestamp': '2025-10-01 04:29:25.425412', 'step': 13258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:25.456995', 'step': 13258, 'epoch': 2} {'type': 'loss', 'content': 0.09250693023204803, 'timestamp': '2025-10-01 04:29:25.459932', 'step': 13259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:25.495176', 'step': 13259, 'epoch': 2} {'type': 'loss', 'content': 0.0663674995303154, 'timestamp': '2025-10-01 04:29:25.519293', 'step': 13260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:25.550997', 'step': 13260, 'epoch': 2} {'type': 'loss', 'content': 0.21902094781398773, 'timestamp': '2025-10-01 04:29:25.558860', 'step': 13261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:25.589599', 'step': 13261, 'epoch': 2} {'type': 'loss', 'content': 0.1445552408695221, 'timestamp': '2025-10-01 04:29:25.591889', 'step': 13262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:25.622436', 'step': 13262, 'epoch': 2} {'type': 'loss', 'content': 0.006350278854370117, 'timestamp': '2025-10-01 04:29:25.624439', 'step': 13263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:25.655539', 'step': 13263, 'epoch': 2} {'type': 'loss', 'content': 0.05071783438324928, 'timestamp': '2025-10-01 04:29:25.679308', 'step': 13264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:25.710176', 'step': 13264, 'epoch': 2} {'type': 'loss', 'content': 0.07789921015501022, 'timestamp': '2025-10-01 04:29:25.712129', 'step': 13265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:25.757084', 'step': 13265, 'epoch': 2} {'type': 'loss', 'content': 0.13121038675308228, 'timestamp': '2025-10-01 04:29:25.760489', 'step': 13266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:25.791403', 'step': 13266, 'epoch': 2} {'type': 'loss', 'content': 0.08064479380846024, 'timestamp': '2025-10-01 04:29:25.793922', 'step': 13267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:25.825327', 'step': 13267, 'epoch': 2} {'type': 'loss', 'content': 0.13210844993591309, 'timestamp': '2025-10-01 04:29:25.849670', 'step': 13268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:25.884190', 'step': 13268, 'epoch': 2} {'type': 'loss', 'content': 0.10598107427358627, 'timestamp': '2025-10-01 04:29:25.886990', 'step': 13269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:25.926007', 'step': 13269, 'epoch': 2} {'type': 'loss', 'content': 0.06034017726778984, 'timestamp': '2025-10-01 04:29:25.928765', 'step': 13270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:25.960901', 'step': 13270, 'epoch': 2} {'type': 'loss', 'content': 0.038514524698257446, 'timestamp': '2025-10-01 04:29:25.963863', 'step': 13271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:25.995571', 'step': 13271, 'epoch': 2} {'type': 'loss', 'content': 0.057173989713191986, 'timestamp': '2025-10-01 04:29:26.029714', 'step': 13272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:26.069294', 'step': 13272, 'epoch': 2} {'type': 'loss', 'content': 0.11326827853918076, 'timestamp': '2025-10-01 04:29:26.071183', 'step': 13273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:26.101877', 'step': 13273, 'epoch': 2} {'type': 'loss', 'content': 0.10297755151987076, 'timestamp': '2025-10-01 04:29:26.103844', 'step': 13274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:26.136464', 'step': 13274, 'epoch': 2} {'type': 'loss', 'content': 0.14543481171131134, 'timestamp': '2025-10-01 04:29:26.139506', 'step': 13275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:26.173222', 'step': 13275, 'epoch': 2} {'type': 'loss', 'content': 0.1111399307847023, 'timestamp': '2025-10-01 04:29:26.196676', 'step': 13276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:26.228541', 'step': 13276, 'epoch': 2} {'type': 'loss', 'content': 0.08302710205316544, 'timestamp': '2025-10-01 04:29:26.230762', 'step': 13277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:26.262353', 'step': 13277, 'epoch': 2} {'type': 'loss', 'content': 0.12828697264194489, 'timestamp': '2025-10-01 04:29:26.273742', 'step': 13278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:26.316324', 'step': 13278, 'epoch': 2} {'type': 'loss', 'content': 0.018937695771455765, 'timestamp': '2025-10-01 04:29:26.319965', 'step': 13279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:26.351914', 'step': 13279, 'epoch': 2} {'type': 'loss', 'content': 0.15603573620319366, 'timestamp': '2025-10-01 04:29:26.375725', 'step': 13280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:26.406832', 'step': 13280, 'epoch': 2} {'type': 'loss', 'content': 0.10171286016702652, 'timestamp': '2025-10-01 04:29:26.417187', 'step': 13281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:26.456171', 'step': 13281, 'epoch': 2} {'type': 'loss', 'content': 0.1661994606256485, 'timestamp': '2025-10-01 04:29:26.458241', 'step': 13282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:26.491947', 'step': 13282, 'epoch': 2} {'type': 'loss', 'content': 0.08216025680303574, 'timestamp': '2025-10-01 04:29:26.494125', 'step': 13283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:26.526216', 'step': 13283, 'epoch': 2} {'type': 'loss', 'content': 0.07718736678361893, 'timestamp': '2025-10-01 04:29:26.550619', 'step': 13284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:26.583432', 'step': 13284, 'epoch': 2} {'type': 'loss', 'content': 0.11493635177612305, 'timestamp': '2025-10-01 04:29:26.586090', 'step': 13285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:26.618480', 'step': 13285, 'epoch': 2} {'type': 'loss', 'content': 0.14902684092521667, 'timestamp': '2025-10-01 04:29:26.620883', 'step': 13286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:26.659526', 'step': 13286, 'epoch': 2} {'type': 'loss', 'content': 0.11556226760149002, 'timestamp': '2025-10-01 04:29:26.661648', 'step': 13287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:26.693196', 'step': 13287, 'epoch': 2} {'type': 'loss', 'content': 0.040606025606393814, 'timestamp': '2025-10-01 04:29:26.716989', 'step': 13288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:26.747532', 'step': 13288, 'epoch': 2} {'type': 'loss', 'content': 0.07477322965860367, 'timestamp': '2025-10-01 04:29:26.749641', 'step': 13289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:26.782630', 'step': 13289, 'epoch': 2} {'type': 'loss', 'content': 0.1028265431523323, 'timestamp': '2025-10-01 04:29:26.784554', 'step': 13290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:26.816813', 'step': 13290, 'epoch': 2} {'type': 'loss', 'content': 0.13922247290611267, 'timestamp': '2025-10-01 04:29:26.820355', 'step': 13291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:26.851125', 'step': 13291, 'epoch': 2} {'type': 'loss', 'content': 0.08310173451900482, 'timestamp': '2025-10-01 04:29:26.875034', 'step': 13292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:29:26.906376', 'step': 13292, 'epoch': 2} {'type': 'loss', 'content': 0.1378374844789505, 'timestamp': '2025-10-01 04:29:26.908617', 'step': 13293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:26.940370', 'step': 13293, 'epoch': 2} {'type': 'loss', 'content': 0.07471177726984024, 'timestamp': '2025-10-01 04:29:26.942840', 'step': 13294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:26.974546', 'step': 13294, 'epoch': 2} {'type': 'loss', 'content': 0.04787075147032738, 'timestamp': '2025-10-01 04:29:26.976617', 'step': 13295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:27.008204', 'step': 13295, 'epoch': 2} {'type': 'loss', 'content': 0.15920379757881165, 'timestamp': '2025-10-01 04:29:27.032207', 'step': 13296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:27.062614', 'step': 13296, 'epoch': 2} {'type': 'loss', 'content': 0.08715864270925522, 'timestamp': '2025-10-01 04:29:27.065590', 'step': 13297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:27.096997', 'step': 13297, 'epoch': 2} {'type': 'loss', 'content': 0.10072614997625351, 'timestamp': '2025-10-01 04:29:27.100301', 'step': 13298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:27.131241', 'step': 13298, 'epoch': 2} {'type': 'loss', 'content': 0.058054715394973755, 'timestamp': '2025-10-01 04:29:27.133471', 'step': 13299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:27.164423', 'step': 13299, 'epoch': 2} {'type': 'loss', 'content': 0.1039961725473404, 'timestamp': '2025-10-01 04:29:27.188450', 'step': 13300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:27.220876', 'step': 13300, 'epoch': 2} {'type': 'loss', 'content': 0.08781476318836212, 'timestamp': '2025-10-01 04:29:27.223144', 'step': 13301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:27.256143', 'step': 13301, 'epoch': 2} {'type': 'loss', 'content': 0.13089962303638458, 'timestamp': '2025-10-01 04:29:27.258290', 'step': 13302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:27.289213', 'step': 13302, 'epoch': 2} {'type': 'loss', 'content': 0.0770963653922081, 'timestamp': '2025-10-01 04:29:27.292929', 'step': 13303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:27.326661', 'step': 13303, 'epoch': 2} {'type': 'loss', 'content': 0.08945441246032715, 'timestamp': '2025-10-01 04:29:27.350460', 'step': 13304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:27.392522', 'step': 13304, 'epoch': 2} {'type': 'loss', 'content': 0.15266023576259613, 'timestamp': '2025-10-01 04:29:27.394572', 'step': 13305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:27.426248', 'step': 13305, 'epoch': 2} {'type': 'loss', 'content': 0.17418664693832397, 'timestamp': '2025-10-01 04:29:27.428220', 'step': 13306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:27.460301', 'step': 13306, 'epoch': 2} {'type': 'loss', 'content': 0.13171519339084625, 'timestamp': '2025-10-01 04:29:27.462322', 'step': 13307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:27.492613', 'step': 13307, 'epoch': 2} {'type': 'loss', 'content': 0.09227442741394043, 'timestamp': '2025-10-01 04:29:27.516162', 'step': 13308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:27.548757', 'step': 13308, 'epoch': 2} {'type': 'loss', 'content': 0.12359610199928284, 'timestamp': '2025-10-01 04:29:27.550868', 'step': 13309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:27.583007', 'step': 13309, 'epoch': 2} {'type': 'loss', 'content': 0.19200801849365234, 'timestamp': '2025-10-01 04:29:27.585258', 'step': 13310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:27.619067', 'step': 13310, 'epoch': 2} {'type': 'loss', 'content': 0.1161079853773117, 'timestamp': '2025-10-01 04:29:27.630085', 'step': 13311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:27.661850', 'step': 13311, 'epoch': 2} {'type': 'loss', 'content': 0.08761753886938095, 'timestamp': '2025-10-01 04:29:27.685478', 'step': 13312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:27.715747', 'step': 13312, 'epoch': 2} {'type': 'loss', 'content': 0.15067633986473083, 'timestamp': '2025-10-01 04:29:27.718541', 'step': 13313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:27.750470', 'step': 13313, 'epoch': 2} {'type': 'loss', 'content': 0.08891326934099197, 'timestamp': '2025-10-01 04:29:27.753641', 'step': 13314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:27.784522', 'step': 13314, 'epoch': 2} {'type': 'loss', 'content': 0.12942175567150116, 'timestamp': '2025-10-01 04:29:27.786681', 'step': 13315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:27.818979', 'step': 13315, 'epoch': 2} {'type': 'loss', 'content': 0.03946206346154213, 'timestamp': '2025-10-01 04:29:27.842621', 'step': 13316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:27.873647', 'step': 13316, 'epoch': 2} {'type': 'loss', 'content': 0.10530449450016022, 'timestamp': '2025-10-01 04:29:27.875766', 'step': 13317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:27.908785', 'step': 13317, 'epoch': 2} {'type': 'loss', 'content': 0.10442905128002167, 'timestamp': '2025-10-01 04:29:27.910873', 'step': 13318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:27.949948', 'step': 13318, 'epoch': 2} {'type': 'loss', 'content': 0.11263994872570038, 'timestamp': '2025-10-01 04:29:27.955463', 'step': 13319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:27.987188', 'step': 13319, 'epoch': 2} {'type': 'loss', 'content': 0.1827966272830963, 'timestamp': '2025-10-01 04:29:28.011025', 'step': 13320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:28.041271', 'step': 13320, 'epoch': 2} {'type': 'loss', 'content': 0.10640204697847366, 'timestamp': '2025-10-01 04:29:28.043306', 'step': 13321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:28.073800', 'step': 13321, 'epoch': 2} {'type': 'loss', 'content': 0.10923656821250916, 'timestamp': '2025-10-01 04:29:28.075815', 'step': 13322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:28.133138', 'step': 13322, 'epoch': 2} {'type': 'loss', 'content': 0.09067755192518234, 'timestamp': '2025-10-01 04:29:28.135565', 'step': 13323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:28.166344', 'step': 13323, 'epoch': 2} {'type': 'loss', 'content': 0.06235645338892937, 'timestamp': '2025-10-01 04:29:28.190567', 'step': 13324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:28.221842', 'step': 13324, 'epoch': 2} {'type': 'loss', 'content': 0.12308982014656067, 'timestamp': '2025-10-01 04:29:28.223910', 'step': 13325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:28.254629', 'step': 13325, 'epoch': 2} {'type': 'loss', 'content': 0.11711002141237259, 'timestamp': '2025-10-01 04:29:28.256736', 'step': 13326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:28.290469', 'step': 13326, 'epoch': 2} {'type': 'loss', 'content': 0.0967470109462738, 'timestamp': '2025-10-01 04:29:28.292745', 'step': 13327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:28.323892', 'step': 13327, 'epoch': 2} {'type': 'loss', 'content': 0.1376660317182541, 'timestamp': '2025-10-01 04:29:28.348077', 'step': 13328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:28.382049', 'step': 13328, 'epoch': 2} {'type': 'loss', 'content': 0.0996517464518547, 'timestamp': '2025-10-01 04:29:28.392201', 'step': 13329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:28.423242', 'step': 13329, 'epoch': 2} {'type': 'loss', 'content': 0.15113115310668945, 'timestamp': '2025-10-01 04:29:28.425615', 'step': 13330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:28.465991', 'step': 13330, 'epoch': 2} {'type': 'loss', 'content': 0.07947733253240585, 'timestamp': '2025-10-01 04:29:28.468111', 'step': 13331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:28.500524', 'step': 13331, 'epoch': 2} {'type': 'loss', 'content': 0.06987384706735611, 'timestamp': '2025-10-01 04:29:28.524315', 'step': 13332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:28.556965', 'step': 13332, 'epoch': 2} {'type': 'loss', 'content': 0.12599579989910126, 'timestamp': '2025-10-01 04:29:28.559142', 'step': 13333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:28.590190', 'step': 13333, 'epoch': 2} {'type': 'loss', 'content': 0.035210657864809036, 'timestamp': '2025-10-01 04:29:28.592478', 'step': 13334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:28.623325', 'step': 13334, 'epoch': 2} {'type': 'loss', 'content': 0.10882895439863205, 'timestamp': '2025-10-01 04:29:28.625516', 'step': 13335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:28.656304', 'step': 13335, 'epoch': 2} {'type': 'loss', 'content': 0.06967513263225555, 'timestamp': '2025-10-01 04:29:28.679996', 'step': 13336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:28.711882', 'step': 13336, 'epoch': 2} {'type': 'loss', 'content': 0.06979510933160782, 'timestamp': '2025-10-01 04:29:28.715332', 'step': 13337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:28.746767', 'step': 13337, 'epoch': 2} {'type': 'loss', 'content': 0.06520862132310867, 'timestamp': '2025-10-01 04:29:28.749174', 'step': 13338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:28.780209', 'step': 13338, 'epoch': 2} {'type': 'loss', 'content': 0.11336684226989746, 'timestamp': '2025-10-01 04:29:28.782031', 'step': 13339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:28.812401', 'step': 13339, 'epoch': 2} {'type': 'loss', 'content': 0.1025213897228241, 'timestamp': '2025-10-01 04:29:28.835997', 'step': 13340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:28.869602', 'step': 13340, 'epoch': 2} {'type': 'loss', 'content': 0.17880743741989136, 'timestamp': '2025-10-01 04:29:28.871795', 'step': 13341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:28.903507', 'step': 13341, 'epoch': 2} {'type': 'loss', 'content': 0.11777652055025101, 'timestamp': '2025-10-01 04:29:28.905757', 'step': 13342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:28.937452', 'step': 13342, 'epoch': 2} {'type': 'loss', 'content': 0.11257649958133698, 'timestamp': '2025-10-01 04:29:28.939960', 'step': 13343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:28.970790', 'step': 13343, 'epoch': 2} {'type': 'loss', 'content': 0.12028401345014572, 'timestamp': '2025-10-01 04:29:28.994559', 'step': 13344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:29.027542', 'step': 13344, 'epoch': 2} {'type': 'loss', 'content': 0.04703231528401375, 'timestamp': '2025-10-01 04:29:29.030222', 'step': 13345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:29.062954', 'step': 13345, 'epoch': 2} {'type': 'loss', 'content': 0.1919984072446823, 'timestamp': '2025-10-01 04:29:29.065216', 'step': 13346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:29.099002', 'step': 13346, 'epoch': 2} {'type': 'loss', 'content': 0.06734566390514374, 'timestamp': '2025-10-01 04:29:29.101052', 'step': 13347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:29.138814', 'step': 13347, 'epoch': 2} {'type': 'loss', 'content': 0.10736257582902908, 'timestamp': '2025-10-01 04:29:29.162418', 'step': 13348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:29.196264', 'step': 13348, 'epoch': 2} {'type': 'loss', 'content': 0.22523008286952972, 'timestamp': '2025-10-01 04:29:29.198346', 'step': 13349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:29.229541', 'step': 13349, 'epoch': 2} {'type': 'loss', 'content': 0.05465321987867355, 'timestamp': '2025-10-01 04:29:29.232124', 'step': 13350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:29.263064', 'step': 13350, 'epoch': 2} {'type': 'loss', 'content': 0.1254139244556427, 'timestamp': '2025-10-01 04:29:29.266764', 'step': 13351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:29.302585', 'step': 13351, 'epoch': 2} {'type': 'loss', 'content': 0.07543039321899414, 'timestamp': '2025-10-01 04:29:29.326121', 'step': 13352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:29.356586', 'step': 13352, 'epoch': 2} {'type': 'loss', 'content': 0.08490195125341415, 'timestamp': '2025-10-01 04:29:29.358746', 'step': 13353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:29.389405', 'step': 13353, 'epoch': 2} {'type': 'loss', 'content': 0.08237744122743607, 'timestamp': '2025-10-01 04:29:29.392409', 'step': 13354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:29.423091', 'step': 13354, 'epoch': 2} {'type': 'loss', 'content': 0.1529463678598404, 'timestamp': '2025-10-01 04:29:29.425296', 'step': 13355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:29.456176', 'step': 13355, 'epoch': 2} {'type': 'loss', 'content': 0.12351536750793457, 'timestamp': '2025-10-01 04:29:29.479835', 'step': 13356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:29.512059', 'step': 13356, 'epoch': 2} {'type': 'loss', 'content': 0.14052172005176544, 'timestamp': '2025-10-01 04:29:29.514089', 'step': 13357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:29.546095', 'step': 13357, 'epoch': 2} {'type': 'loss', 'content': 0.10130561143159866, 'timestamp': '2025-10-01 04:29:29.548158', 'step': 13358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:29.581052', 'step': 13358, 'epoch': 2} {'type': 'loss', 'content': 0.031484123319387436, 'timestamp': '2025-10-01 04:29:29.583428', 'step': 13359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:29.614442', 'step': 13359, 'epoch': 2} {'type': 'loss', 'content': 0.17160436511039734, 'timestamp': '2025-10-01 04:29:29.638650', 'step': 13360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:29.673418', 'step': 13360, 'epoch': 2} {'type': 'loss', 'content': 0.099665068089962, 'timestamp': '2025-10-01 04:29:29.675335', 'step': 13361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:29.707606', 'step': 13361, 'epoch': 2} {'type': 'loss', 'content': 0.07406575977802277, 'timestamp': '2025-10-01 04:29:29.715427', 'step': 13362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:29.746863', 'step': 13362, 'epoch': 2} {'type': 'loss', 'content': 0.09299498051404953, 'timestamp': '2025-10-01 04:29:29.751500', 'step': 13363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:29.785616', 'step': 13363, 'epoch': 2} {'type': 'loss', 'content': 0.15972058475017548, 'timestamp': '2025-10-01 04:29:29.809518', 'step': 13364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:29.843410', 'step': 13364, 'epoch': 2} {'type': 'loss', 'content': 0.14262281358242035, 'timestamp': '2025-10-01 04:29:29.845455', 'step': 13365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:29.882359', 'step': 13365, 'epoch': 2} {'type': 'loss', 'content': 0.14339523017406464, 'timestamp': '2025-10-01 04:29:29.884399', 'step': 13366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:29.916904', 'step': 13366, 'epoch': 2} {'type': 'loss', 'content': 0.12568363547325134, 'timestamp': '2025-10-01 04:29:29.919149', 'step': 13367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:29.956389', 'step': 13367, 'epoch': 2} {'type': 'loss', 'content': 0.14781782031059265, 'timestamp': '2025-10-01 04:29:29.980359', 'step': 13368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.011862', 'step': 13368, 'epoch': 2} {'type': 'loss', 'content': 0.09283280372619629, 'timestamp': '2025-10-01 04:29:30.013806', 'step': 13369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:30.062630', 'step': 13369, 'epoch': 2} {'type': 'loss', 'content': 0.08279801905155182, 'timestamp': '2025-10-01 04:29:30.066455', 'step': 13370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.101969', 'step': 13370, 'epoch': 2} {'type': 'loss', 'content': 0.11422492563724518, 'timestamp': '2025-10-01 04:29:30.103999', 'step': 13371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:30.136884', 'step': 13371, 'epoch': 2} {'type': 'loss', 'content': 0.11446299403905869, 'timestamp': '2025-10-01 04:29:30.160788', 'step': 13372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:30.193075', 'step': 13372, 'epoch': 2} {'type': 'loss', 'content': 0.08796979486942291, 'timestamp': '2025-10-01 04:29:30.195289', 'step': 13373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.225717', 'step': 13373, 'epoch': 2} {'type': 'loss', 'content': 0.2483757585287094, 'timestamp': '2025-10-01 04:29:30.228888', 'step': 13374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:30.260233', 'step': 13374, 'epoch': 2} {'type': 'loss', 'content': 0.14680969715118408, 'timestamp': '2025-10-01 04:29:30.262585', 'step': 13375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:30.293508', 'step': 13375, 'epoch': 2} {'type': 'loss', 'content': 0.0902949646115303, 'timestamp': '2025-10-01 04:29:30.324654', 'step': 13376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:30.356591', 'step': 13376, 'epoch': 2} {'type': 'loss', 'content': 0.09701176732778549, 'timestamp': '2025-10-01 04:29:30.358900', 'step': 13377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:30.391950', 'step': 13377, 'epoch': 2} {'type': 'loss', 'content': 0.04132630303502083, 'timestamp': '2025-10-01 04:29:30.394189', 'step': 13378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.428522', 'step': 13378, 'epoch': 2} {'type': 'loss', 'content': 0.14916996657848358, 'timestamp': '2025-10-01 04:29:30.434152', 'step': 13379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.470485', 'step': 13379, 'epoch': 2} {'type': 'loss', 'content': 0.10396817326545715, 'timestamp': '2025-10-01 04:29:30.494640', 'step': 13380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.525837', 'step': 13380, 'epoch': 2} {'type': 'loss', 'content': 0.0992753803730011, 'timestamp': '2025-10-01 04:29:30.527944', 'step': 13381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:30.559093', 'step': 13381, 'epoch': 2} {'type': 'loss', 'content': 0.13863766193389893, 'timestamp': '2025-10-01 04:29:30.561251', 'step': 13382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.602702', 'step': 13382, 'epoch': 2} {'type': 'loss', 'content': 0.19448328018188477, 'timestamp': '2025-10-01 04:29:30.604829', 'step': 13383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:30.636309', 'step': 13383, 'epoch': 2} {'type': 'loss', 'content': 0.0998513475060463, 'timestamp': '2025-10-01 04:29:30.659784', 'step': 13384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:30.692827', 'step': 13384, 'epoch': 2} {'type': 'loss', 'content': 0.045572828501462936, 'timestamp': '2025-10-01 04:29:30.697430', 'step': 13385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:30.728811', 'step': 13385, 'epoch': 2} {'type': 'loss', 'content': 0.10578670352697372, 'timestamp': '2025-10-01 04:29:30.731010', 'step': 13386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.768522', 'step': 13386, 'epoch': 2} {'type': 'loss', 'content': 0.06109459698200226, 'timestamp': '2025-10-01 04:29:30.770906', 'step': 13387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.809338', 'step': 13387, 'epoch': 2} {'type': 'loss', 'content': 0.057516638189554214, 'timestamp': '2025-10-01 04:29:30.835307', 'step': 13388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.868570', 'step': 13388, 'epoch': 2} {'type': 'loss', 'content': 0.07650434970855713, 'timestamp': '2025-10-01 04:29:30.870771', 'step': 13389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.903856', 'step': 13389, 'epoch': 2} {'type': 'loss', 'content': 0.03679697960615158, 'timestamp': '2025-10-01 04:29:30.908967', 'step': 13390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.943002', 'step': 13390, 'epoch': 2} {'type': 'loss', 'content': 0.09343352913856506, 'timestamp': '2025-10-01 04:29:30.945858', 'step': 13391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:30.977221', 'step': 13391, 'epoch': 2} {'type': 'loss', 'content': 0.11393924802541733, 'timestamp': '2025-10-01 04:29:31.000818', 'step': 13392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:31.031757', 'step': 13392, 'epoch': 2} {'type': 'loss', 'content': 0.18494552373886108, 'timestamp': '2025-10-01 04:29:31.033869', 'step': 13393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:31.065050', 'step': 13393, 'epoch': 2} {'type': 'loss', 'content': 0.10218948125839233, 'timestamp': '2025-10-01 04:29:31.074988', 'step': 13394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:31.106663', 'step': 13394, 'epoch': 2} {'type': 'loss', 'content': 0.08542744070291519, 'timestamp': '2025-10-01 04:29:31.108780', 'step': 13395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:31.140566', 'step': 13395, 'epoch': 2} {'type': 'loss', 'content': 0.07563449442386627, 'timestamp': '2025-10-01 04:29:31.164334', 'step': 13396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:31.195395', 'step': 13396, 'epoch': 2} {'type': 'loss', 'content': 0.06359736621379852, 'timestamp': '2025-10-01 04:29:31.197520', 'step': 13397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:31.230055', 'step': 13397, 'epoch': 2} {'type': 'loss', 'content': 0.08237536251544952, 'timestamp': '2025-10-01 04:29:31.232269', 'step': 13398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:31.263392', 'step': 13398, 'epoch': 2} {'type': 'loss', 'content': 0.08636142313480377, 'timestamp': '2025-10-01 04:29:31.265562', 'step': 13399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:31.299884', 'step': 13399, 'epoch': 2} {'type': 'loss', 'content': 0.1805163025856018, 'timestamp': '2025-10-01 04:29:31.323944', 'step': 13400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:31.356170', 'step': 13400, 'epoch': 2} {'type': 'loss', 'content': 0.13698355853557587, 'timestamp': '2025-10-01 04:29:31.358425', 'step': 13401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:31.393184', 'step': 13401, 'epoch': 2} {'type': 'loss', 'content': 0.1149422898888588, 'timestamp': '2025-10-01 04:29:31.395245', 'step': 13402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:31.427730', 'step': 13402, 'epoch': 2} {'type': 'loss', 'content': 0.044926051050424576, 'timestamp': '2025-10-01 04:29:31.430438', 'step': 13403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:31.461102', 'step': 13403, 'epoch': 2} {'type': 'loss', 'content': 0.06055929511785507, 'timestamp': '2025-10-01 04:29:31.484810', 'step': 13404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:31.516863', 'step': 13404, 'epoch': 2} {'type': 'loss', 'content': 0.19630783796310425, 'timestamp': '2025-10-01 04:29:31.518899', 'step': 13405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:31.551871', 'step': 13405, 'epoch': 2} {'type': 'loss', 'content': 0.08103486895561218, 'timestamp': '2025-10-01 04:29:31.554235', 'step': 13406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:31.600270', 'step': 13406, 'epoch': 2} {'type': 'loss', 'content': 0.10166346281766891, 'timestamp': '2025-10-01 04:29:31.602637', 'step': 13407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:31.634746', 'step': 13407, 'epoch': 2} {'type': 'loss', 'content': 0.16925708949565887, 'timestamp': '2025-10-01 04:29:31.658353', 'step': 13408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:31.688634', 'step': 13408, 'epoch': 2} {'type': 'loss', 'content': 0.08068358153104782, 'timestamp': '2025-10-01 04:29:31.690582', 'step': 13409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:31.720507', 'step': 13409, 'epoch': 2} {'type': 'loss', 'content': 0.1507694125175476, 'timestamp': '2025-10-01 04:29:31.722553', 'step': 13410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:31.752623', 'step': 13410, 'epoch': 2} {'type': 'loss', 'content': 0.08432535827159882, 'timestamp': '2025-10-01 04:29:31.754783', 'step': 13411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:31.784762', 'step': 13411, 'epoch': 2} {'type': 'loss', 'content': 0.117198646068573, 'timestamp': '2025-10-01 04:29:31.808439', 'step': 13412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:31.840184', 'step': 13412, 'epoch': 2} {'type': 'loss', 'content': 0.1192590743303299, 'timestamp': '2025-10-01 04:29:31.842730', 'step': 13413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:31.872568', 'step': 13413, 'epoch': 2} {'type': 'loss', 'content': 0.07691027224063873, 'timestamp': '2025-10-01 04:29:31.874579', 'step': 13414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:31.904935', 'step': 13414, 'epoch': 2} {'type': 'loss', 'content': 0.08967646956443787, 'timestamp': '2025-10-01 04:29:31.910442', 'step': 13415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:31.943847', 'step': 13415, 'epoch': 2} {'type': 'loss', 'content': 0.06154749542474747, 'timestamp': '2025-10-01 04:29:31.967520', 'step': 13416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:31.999867', 'step': 13416, 'epoch': 2} {'type': 'loss', 'content': 0.10297392308712006, 'timestamp': '2025-10-01 04:29:32.010827', 'step': 13417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:32.047050', 'step': 13417, 'epoch': 2} {'type': 'loss', 'content': 0.09178292006254196, 'timestamp': '2025-10-01 04:29:32.049582', 'step': 13418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:32.081745', 'step': 13418, 'epoch': 2} {'type': 'loss', 'content': 0.16431912779808044, 'timestamp': '2025-10-01 04:29:32.083643', 'step': 13419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:32.114727', 'step': 13419, 'epoch': 2} {'type': 'loss', 'content': 0.05547983944416046, 'timestamp': '2025-10-01 04:29:32.140900', 'step': 13420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:32.171621', 'step': 13420, 'epoch': 2} {'type': 'loss', 'content': 0.14373908936977386, 'timestamp': '2025-10-01 04:29:32.174081', 'step': 13421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:32.206010', 'step': 13421, 'epoch': 2} {'type': 'loss', 'content': 0.0822645053267479, 'timestamp': '2025-10-01 04:29:32.209655', 'step': 13422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:32.242910', 'step': 13422, 'epoch': 2} {'type': 'loss', 'content': 0.021532023325562477, 'timestamp': '2025-10-01 04:29:32.246230', 'step': 13423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:32.278461', 'step': 13423, 'epoch': 2} {'type': 'loss', 'content': 0.046288877725601196, 'timestamp': '2025-10-01 04:29:32.310254', 'step': 13424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:32.357385', 'step': 13424, 'epoch': 2} {'type': 'loss', 'content': 0.08451976627111435, 'timestamp': '2025-10-01 04:29:32.359498', 'step': 13425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:32.399098', 'step': 13425, 'epoch': 2} {'type': 'loss', 'content': 0.07392273843288422, 'timestamp': '2025-10-01 04:29:32.401777', 'step': 13426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:32.440997', 'step': 13426, 'epoch': 2} {'type': 'loss', 'content': 0.08985627442598343, 'timestamp': '2025-10-01 04:29:32.443190', 'step': 13427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:32.481011', 'step': 13427, 'epoch': 2} {'type': 'loss', 'content': 0.09981092065572739, 'timestamp': '2025-10-01 04:29:32.504814', 'step': 13428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:32.536949', 'step': 13428, 'epoch': 2} {'type': 'loss', 'content': 0.07864215224981308, 'timestamp': '2025-10-01 04:29:32.542961', 'step': 13429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:32.574631', 'step': 13429, 'epoch': 2} {'type': 'loss', 'content': 0.09194615483283997, 'timestamp': '2025-10-01 04:29:32.576758', 'step': 13430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:32.607393', 'step': 13430, 'epoch': 2} {'type': 'loss', 'content': 0.05006290599703789, 'timestamp': '2025-10-01 04:29:32.610173', 'step': 13431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:29:32.642422', 'step': 13431, 'epoch': 2} {'type': 'loss', 'content': 0.06258581578731537, 'timestamp': '2025-10-01 04:29:32.670511', 'step': 13432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:32.709690', 'step': 13432, 'epoch': 2} {'type': 'loss', 'content': 0.11768946796655655, 'timestamp': '2025-10-01 04:29:32.711721', 'step': 13433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:32.744854', 'step': 13433, 'epoch': 2} {'type': 'loss', 'content': 0.062048740684986115, 'timestamp': '2025-10-01 04:29:32.747046', 'step': 13434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:32.783403', 'step': 13434, 'epoch': 2} {'type': 'loss', 'content': 0.16109752655029297, 'timestamp': '2025-10-01 04:29:32.786068', 'step': 13435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:32.816607', 'step': 13435, 'epoch': 2} {'type': 'loss', 'content': 0.1020597368478775, 'timestamp': '2025-10-01 04:29:32.840314', 'step': 13436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:32.871809', 'step': 13436, 'epoch': 2} {'type': 'loss', 'content': 0.09548265486955643, 'timestamp': '2025-10-01 04:29:32.874469', 'step': 13437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:32.905269', 'step': 13437, 'epoch': 2} {'type': 'loss', 'content': 0.11284935474395752, 'timestamp': '2025-10-01 04:29:32.907374', 'step': 13438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:32.939203', 'step': 13438, 'epoch': 2} {'type': 'loss', 'content': 0.1579304337501526, 'timestamp': '2025-10-01 04:29:32.941241', 'step': 13439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:29:32.972104', 'step': 13439, 'epoch': 2} {'type': 'loss', 'content': 0.10732147842645645, 'timestamp': '2025-10-01 04:29:32.997571', 'step': 13440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:33.028212', 'step': 13440, 'epoch': 2} {'type': 'loss', 'content': 0.10625980794429779, 'timestamp': '2025-10-01 04:29:33.030278', 'step': 13441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.062452', 'step': 13441, 'epoch': 2} {'type': 'loss', 'content': 0.12234492599964142, 'timestamp': '2025-10-01 04:29:33.077206', 'step': 13442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:33.111060', 'step': 13442, 'epoch': 2} {'type': 'loss', 'content': 0.11649633944034576, 'timestamp': '2025-10-01 04:29:33.113394', 'step': 13443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:33.146610', 'step': 13443, 'epoch': 2} {'type': 'loss', 'content': 0.17127229273319244, 'timestamp': '2025-10-01 04:29:33.170297', 'step': 13444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.200765', 'step': 13444, 'epoch': 2} {'type': 'loss', 'content': 0.09413138031959534, 'timestamp': '2025-10-01 04:29:33.202903', 'step': 13445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:33.233527', 'step': 13445, 'epoch': 2} {'type': 'loss', 'content': 0.06866533309221268, 'timestamp': '2025-10-01 04:29:33.235888', 'step': 13446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.266162', 'step': 13446, 'epoch': 2} {'type': 'loss', 'content': 0.07361608743667603, 'timestamp': '2025-10-01 04:29:33.268219', 'step': 13447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:33.299316', 'step': 13447, 'epoch': 2} {'type': 'loss', 'content': 0.11150691658258438, 'timestamp': '2025-10-01 04:29:33.323661', 'step': 13448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:33.363908', 'step': 13448, 'epoch': 2} {'type': 'loss', 'content': 0.08801566809415817, 'timestamp': '2025-10-01 04:29:33.367175', 'step': 13449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.412505', 'step': 13449, 'epoch': 2} {'type': 'loss', 'content': 0.1443074345588684, 'timestamp': '2025-10-01 04:29:33.414654', 'step': 13450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.447277', 'step': 13450, 'epoch': 2} {'type': 'loss', 'content': 0.09818404167890549, 'timestamp': '2025-10-01 04:29:33.455810', 'step': 13451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.488297', 'step': 13451, 'epoch': 2} {'type': 'loss', 'content': 0.056177712976932526, 'timestamp': '2025-10-01 04:29:33.511999', 'step': 13452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:33.544311', 'step': 13452, 'epoch': 2} {'type': 'loss', 'content': 0.07995497435331345, 'timestamp': '2025-10-01 04:29:33.547334', 'step': 13453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.578918', 'step': 13453, 'epoch': 2} {'type': 'loss', 'content': 0.14180289208889008, 'timestamp': '2025-10-01 04:29:33.581189', 'step': 13454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:33.614220', 'step': 13454, 'epoch': 2} {'type': 'loss', 'content': 0.18268217146396637, 'timestamp': '2025-10-01 04:29:33.616251', 'step': 13455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:33.649248', 'step': 13455, 'epoch': 2} {'type': 'loss', 'content': 0.0895276591181755, 'timestamp': '2025-10-01 04:29:33.673672', 'step': 13456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.708979', 'step': 13456, 'epoch': 2} {'type': 'loss', 'content': 0.10837690532207489, 'timestamp': '2025-10-01 04:29:33.711299', 'step': 13457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:33.743932', 'step': 13457, 'epoch': 2} {'type': 'loss', 'content': 0.10838307440280914, 'timestamp': '2025-10-01 04:29:33.746906', 'step': 13458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:33.780058', 'step': 13458, 'epoch': 2} {'type': 'loss', 'content': 0.13760142028331757, 'timestamp': '2025-10-01 04:29:33.782245', 'step': 13459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:33.814910', 'step': 13459, 'epoch': 2} {'type': 'loss', 'content': 0.17954696714878082, 'timestamp': '2025-10-01 04:29:33.838842', 'step': 13460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.883956', 'step': 13460, 'epoch': 2} {'type': 'loss', 'content': 0.12313390523195267, 'timestamp': '2025-10-01 04:29:33.886157', 'step': 13461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:33.918898', 'step': 13461, 'epoch': 2} {'type': 'loss', 'content': 0.1125974953174591, 'timestamp': '2025-10-01 04:29:33.921494', 'step': 13462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.953585', 'step': 13462, 'epoch': 2} {'type': 'loss', 'content': 0.13285066187381744, 'timestamp': '2025-10-01 04:29:33.955746', 'step': 13463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:33.986888', 'step': 13463, 'epoch': 2} {'type': 'loss', 'content': 0.10359801352024078, 'timestamp': '2025-10-01 04:29:34.021288', 'step': 13464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:34.052804', 'step': 13464, 'epoch': 2} {'type': 'loss', 'content': 0.11656699329614639, 'timestamp': '2025-10-01 04:29:34.068742', 'step': 13465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:34.108957', 'step': 13465, 'epoch': 2} {'type': 'loss', 'content': 0.0823495164513588, 'timestamp': '2025-10-01 04:29:34.111793', 'step': 13466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:34.143983', 'step': 13466, 'epoch': 2} {'type': 'loss', 'content': 0.21077534556388855, 'timestamp': '2025-10-01 04:29:34.146167', 'step': 13467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:34.180158', 'step': 13467, 'epoch': 2} {'type': 'loss', 'content': 0.05613135173916817, 'timestamp': '2025-10-01 04:29:34.204328', 'step': 13468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:34.245770', 'step': 13468, 'epoch': 2} {'type': 'loss', 'content': 0.06258419901132584, 'timestamp': '2025-10-01 04:29:34.247897', 'step': 13469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:34.278926', 'step': 13469, 'epoch': 2} {'type': 'loss', 'content': 0.11737128347158432, 'timestamp': '2025-10-01 04:29:34.281030', 'step': 13470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:34.315703', 'step': 13470, 'epoch': 2} {'type': 'loss', 'content': 0.15961551666259766, 'timestamp': '2025-10-01 04:29:34.317769', 'step': 13471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:34.349461', 'step': 13471, 'epoch': 2} {'type': 'loss', 'content': 0.04227184131741524, 'timestamp': '2025-10-01 04:29:34.373118', 'step': 13472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:34.404700', 'step': 13472, 'epoch': 2} {'type': 'loss', 'content': 0.1364039182662964, 'timestamp': '2025-10-01 04:29:34.407193', 'step': 13473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:34.443126', 'step': 13473, 'epoch': 2} {'type': 'loss', 'content': 0.1428779661655426, 'timestamp': '2025-10-01 04:29:34.445819', 'step': 13474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:29:34.480263', 'step': 13474, 'epoch': 2} {'type': 'loss', 'content': 0.053689248859882355, 'timestamp': '2025-10-01 04:29:34.492283', 'step': 13475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:34.531601', 'step': 13475, 'epoch': 2} {'type': 'loss', 'content': 0.11966311931610107, 'timestamp': '2025-10-01 04:29:34.555432', 'step': 13476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:34.587365', 'step': 13476, 'epoch': 2} {'type': 'loss', 'content': 0.10510312765836716, 'timestamp': '2025-10-01 04:29:34.589782', 'step': 13477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:34.628049', 'step': 13477, 'epoch': 2} {'type': 'loss', 'content': 0.049630988389253616, 'timestamp': '2025-10-01 04:29:34.630328', 'step': 13478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:34.682458', 'step': 13478, 'epoch': 2} {'type': 'loss', 'content': 0.05964149534702301, 'timestamp': '2025-10-01 04:29:34.685212', 'step': 13479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:34.715746', 'step': 13479, 'epoch': 2} {'type': 'loss', 'content': 0.15488997101783752, 'timestamp': '2025-10-01 04:29:34.739417', 'step': 13480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:34.771517', 'step': 13480, 'epoch': 2} {'type': 'loss', 'content': 0.11912913620471954, 'timestamp': '2025-10-01 04:29:34.773856', 'step': 13481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:34.806054', 'step': 13481, 'epoch': 2} {'type': 'loss', 'content': 0.07886561006307602, 'timestamp': '2025-10-01 04:29:34.808587', 'step': 13482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:34.839687', 'step': 13482, 'epoch': 2} {'type': 'loss', 'content': 0.07148654758930206, 'timestamp': '2025-10-01 04:29:34.842019', 'step': 13483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:34.872834', 'step': 13483, 'epoch': 2} {'type': 'loss', 'content': 0.12045446783304214, 'timestamp': '2025-10-01 04:29:34.911949', 'step': 13484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:34.943895', 'step': 13484, 'epoch': 2} {'type': 'loss', 'content': 0.06864225119352341, 'timestamp': '2025-10-01 04:29:34.946124', 'step': 13485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:34.978551', 'step': 13485, 'epoch': 2} {'type': 'loss', 'content': 0.10915849357843399, 'timestamp': '2025-10-01 04:29:34.980922', 'step': 13486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:35.015802', 'step': 13486, 'epoch': 2} {'type': 'loss', 'content': 0.05680280551314354, 'timestamp': '2025-10-01 04:29:35.019682', 'step': 13487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:35.050637', 'step': 13487, 'epoch': 2} {'type': 'loss', 'content': 0.11981213837862015, 'timestamp': '2025-10-01 04:29:35.076548', 'step': 13488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:35.106629', 'step': 13488, 'epoch': 2} {'type': 'loss', 'content': 0.13280576467514038, 'timestamp': '2025-10-01 04:29:35.113696', 'step': 13489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:35.143782', 'step': 13489, 'epoch': 2} {'type': 'loss', 'content': 0.11949799954891205, 'timestamp': '2025-10-01 04:29:35.146054', 'step': 13490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:35.177079', 'step': 13490, 'epoch': 2} {'type': 'loss', 'content': 0.07627003639936447, 'timestamp': '2025-10-01 04:29:35.181210', 'step': 13491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:35.212691', 'step': 13491, 'epoch': 2} {'type': 'loss', 'content': 0.08527565002441406, 'timestamp': '2025-10-01 04:29:35.236714', 'step': 13492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:35.273241', 'step': 13492, 'epoch': 2} {'type': 'loss', 'content': 0.0905149057507515, 'timestamp': '2025-10-01 04:29:35.277731', 'step': 13493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:35.308509', 'step': 13493, 'epoch': 2} {'type': 'loss', 'content': 0.1295899599790573, 'timestamp': '2025-10-01 04:29:35.310725', 'step': 13494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:35.341715', 'step': 13494, 'epoch': 2} {'type': 'loss', 'content': 0.16858328878879547, 'timestamp': '2025-10-01 04:29:35.343906', 'step': 13495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:35.387177', 'step': 13495, 'epoch': 2} {'type': 'loss', 'content': 0.20071615278720856, 'timestamp': '2025-10-01 04:29:35.410933', 'step': 13496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:35.466589', 'step': 13496, 'epoch': 2} {'type': 'loss', 'content': 0.08562647551298141, 'timestamp': '2025-10-01 04:29:35.469599', 'step': 13497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:35.501947', 'step': 13497, 'epoch': 2} {'type': 'loss', 'content': 0.09405408799648285, 'timestamp': '2025-10-01 04:29:35.504598', 'step': 13498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:35.539422', 'step': 13498, 'epoch': 2} {'type': 'loss', 'content': 0.11019615083932877, 'timestamp': '2025-10-01 04:29:35.541447', 'step': 13499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:35.571869', 'step': 13499, 'epoch': 2} {'type': 'loss', 'content': 0.08497321605682373, 'timestamp': '2025-10-01 04:29:35.595465', 'step': 13500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13500', 'timestamp': '2025-10-01 04:29:40.485144', 'step': 13500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:40.526829', 'step': 13500, 'epoch': 2} {'type': 'loss', 'content': 0.16184556484222412, 'timestamp': '2025-10-01 04:29:40.528975', 'step': 13501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:40.560397', 'step': 13501, 'epoch': 2} {'type': 'loss', 'content': 0.07435809075832367, 'timestamp': '2025-10-01 04:29:40.562448', 'step': 13502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:40.593131', 'step': 13502, 'epoch': 2} {'type': 'loss', 'content': 0.12431108951568604, 'timestamp': '2025-10-01 04:29:40.595703', 'step': 13503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:40.630280', 'step': 13503, 'epoch': 2} {'type': 'loss', 'content': 0.06299378722906113, 'timestamp': '2025-10-01 04:29:40.653964', 'step': 13504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:40.690864', 'step': 13504, 'epoch': 2} {'type': 'loss', 'content': 0.11702468246221542, 'timestamp': '2025-10-01 04:29:40.700277', 'step': 13505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:40.739904', 'step': 13505, 'epoch': 2} {'type': 'loss', 'content': 0.1016615778207779, 'timestamp': '2025-10-01 04:29:40.741997', 'step': 13506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:40.773468', 'step': 13506, 'epoch': 2} {'type': 'loss', 'content': 0.05879965424537659, 'timestamp': '2025-10-01 04:29:40.776301', 'step': 13507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:40.807979', 'step': 13507, 'epoch': 2} {'type': 'loss', 'content': 0.04751341789960861, 'timestamp': '2025-10-01 04:29:40.833033', 'step': 13508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:40.864934', 'step': 13508, 'epoch': 2} {'type': 'loss', 'content': 0.08946079015731812, 'timestamp': '2025-10-01 04:29:40.866949', 'step': 13509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:40.899109', 'step': 13509, 'epoch': 2} {'type': 'loss', 'content': 0.09177427738904953, 'timestamp': '2025-10-01 04:29:40.901379', 'step': 13510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:40.932242', 'step': 13510, 'epoch': 2} {'type': 'loss', 'content': 0.1741233468055725, 'timestamp': '2025-10-01 04:29:40.935095', 'step': 13511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:40.965775', 'step': 13511, 'epoch': 2} {'type': 'loss', 'content': 0.07297533750534058, 'timestamp': '2025-10-01 04:29:40.989419', 'step': 13512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:41.020195', 'step': 13512, 'epoch': 2} {'type': 'loss', 'content': 0.0625167116522789, 'timestamp': '2025-10-01 04:29:41.035533', 'step': 13513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:41.067205', 'step': 13513, 'epoch': 2} {'type': 'loss', 'content': 0.04516815394163132, 'timestamp': '2025-10-01 04:29:41.069356', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:29:49.934196', 'step': 13514, 'epoch': 2} {'type': 'pplx', 'content': 10945.211546649552, 'timestamp': '2025-10-01 04:29:49.941265', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:49.972107', 'step': 13514, 'epoch': 2} {'type': 'loss', 'content': 0.14995144307613373, 'timestamp': '2025-10-01 04:29:49.974912', 'step': 13515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:50.015534', 'step': 13515, 'epoch': 2} {'type': 'loss', 'content': 0.10723205655813217, 'timestamp': '2025-10-01 04:29:50.041936', 'step': 13516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:50.096224', 'step': 13516, 'epoch': 2} {'type': 'loss', 'content': 0.06857327371835709, 'timestamp': '2025-10-01 04:29:50.098761', 'step': 13517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:50.133406', 'step': 13517, 'epoch': 2} {'type': 'loss', 'content': 0.11023984849452972, 'timestamp': '2025-10-01 04:29:50.135938', 'step': 13518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:50.168222', 'step': 13518, 'epoch': 2} {'type': 'loss', 'content': 0.22304072976112366, 'timestamp': '2025-10-01 04:29:50.171217', 'step': 13519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:50.203069', 'step': 13519, 'epoch': 2} {'type': 'loss', 'content': 0.11157230287790298, 'timestamp': '2025-10-01 04:29:50.227099', 'step': 13520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:50.261918', 'step': 13520, 'epoch': 2} {'type': 'loss', 'content': 0.15902432799339294, 'timestamp': '2025-10-01 04:29:50.265709', 'step': 13521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:50.304256', 'step': 13521, 'epoch': 2} {'type': 'loss', 'content': 0.14110638201236725, 'timestamp': '2025-10-01 04:29:50.310418', 'step': 13522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:50.346253', 'step': 13522, 'epoch': 2} {'type': 'loss', 'content': 0.034957077354192734, 'timestamp': '2025-10-01 04:29:50.355436', 'step': 13523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:50.395797', 'step': 13523, 'epoch': 2} {'type': 'loss', 'content': 0.06447924673557281, 'timestamp': '2025-10-01 04:29:50.419999', 'step': 13524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:50.458473', 'step': 13524, 'epoch': 2} {'type': 'loss', 'content': 0.15303149819374084, 'timestamp': '2025-10-01 04:29:50.460837', 'step': 13525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:50.506698', 'step': 13525, 'epoch': 2} {'type': 'loss', 'content': 0.048141371458768845, 'timestamp': '2025-10-01 04:29:50.508842', 'step': 13526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:50.542192', 'step': 13526, 'epoch': 2} {'type': 'loss', 'content': 0.1480318307876587, 'timestamp': '2025-10-01 04:29:50.544233', 'step': 13527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:50.574345', 'step': 13527, 'epoch': 2} {'type': 'loss', 'content': 0.10073868185281754, 'timestamp': '2025-10-01 04:29:50.598136', 'step': 13528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:50.630391', 'step': 13528, 'epoch': 2} {'type': 'loss', 'content': 0.11861666291952133, 'timestamp': '2025-10-01 04:29:50.633059', 'step': 13529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:50.665875', 'step': 13529, 'epoch': 2} {'type': 'loss', 'content': 0.07455576211214066, 'timestamp': '2025-10-01 04:29:50.668363', 'step': 13530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:50.701849', 'step': 13530, 'epoch': 2} {'type': 'loss', 'content': 0.1474761664867401, 'timestamp': '2025-10-01 04:29:50.704066', 'step': 13531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:50.736005', 'step': 13531, 'epoch': 2} {'type': 'loss', 'content': 0.06126978620886803, 'timestamp': '2025-10-01 04:29:50.760254', 'step': 13532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:50.794671', 'step': 13532, 'epoch': 2} {'type': 'loss', 'content': 0.09939634054899216, 'timestamp': '2025-10-01 04:29:50.797500', 'step': 13533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:50.834084', 'step': 13533, 'epoch': 2} {'type': 'loss', 'content': 0.10552079230546951, 'timestamp': '2025-10-01 04:29:50.836324', 'step': 13534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:50.869859', 'step': 13534, 'epoch': 2} {'type': 'loss', 'content': 0.10585635900497437, 'timestamp': '2025-10-01 04:29:50.883929', 'step': 13535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:50.916815', 'step': 13535, 'epoch': 2} {'type': 'loss', 'content': 0.1322583258152008, 'timestamp': '2025-10-01 04:29:50.940774', 'step': 13536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:50.973808', 'step': 13536, 'epoch': 2} {'type': 'loss', 'content': 0.07286861538887024, 'timestamp': '2025-10-01 04:29:50.976701', 'step': 13537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.008658', 'step': 13537, 'epoch': 2} {'type': 'loss', 'content': 0.06531290709972382, 'timestamp': '2025-10-01 04:29:51.011148', 'step': 13538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.043624', 'step': 13538, 'epoch': 2} {'type': 'loss', 'content': 0.14035648107528687, 'timestamp': '2025-10-01 04:29:51.046456', 'step': 13539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.078893', 'step': 13539, 'epoch': 2} {'type': 'loss', 'content': 0.10993333160877228, 'timestamp': '2025-10-01 04:29:51.102987', 'step': 13540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:51.136047', 'step': 13540, 'epoch': 2} {'type': 'loss', 'content': 0.19397270679473877, 'timestamp': '2025-10-01 04:29:51.138454', 'step': 13541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:51.178759', 'step': 13541, 'epoch': 2} {'type': 'loss', 'content': 0.06998252123594284, 'timestamp': '2025-10-01 04:29:51.180893', 'step': 13542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:51.220416', 'step': 13542, 'epoch': 2} {'type': 'loss', 'content': 0.12063267827033997, 'timestamp': '2025-10-01 04:29:51.222567', 'step': 13543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.257278', 'step': 13543, 'epoch': 2} {'type': 'loss', 'content': 0.11302859336137772, 'timestamp': '2025-10-01 04:29:51.284109', 'step': 13544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:51.315227', 'step': 13544, 'epoch': 2} {'type': 'loss', 'content': 0.12881910800933838, 'timestamp': '2025-10-01 04:29:51.317389', 'step': 13545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.348731', 'step': 13545, 'epoch': 2} {'type': 'loss', 'content': 0.12780460715293884, 'timestamp': '2025-10-01 04:29:51.350897', 'step': 13546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:51.383980', 'step': 13546, 'epoch': 2} {'type': 'loss', 'content': 0.19020810723304749, 'timestamp': '2025-10-01 04:29:51.386089', 'step': 13547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:51.419356', 'step': 13547, 'epoch': 2} {'type': 'loss', 'content': 0.18809102475643158, 'timestamp': '2025-10-01 04:29:51.456013', 'step': 13548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.492899', 'step': 13548, 'epoch': 2} {'type': 'loss', 'content': 0.04109460860490799, 'timestamp': '2025-10-01 04:29:51.494933', 'step': 13549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.528573', 'step': 13549, 'epoch': 2} {'type': 'loss', 'content': 0.09220331907272339, 'timestamp': '2025-10-01 04:29:51.530639', 'step': 13550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:51.562564', 'step': 13550, 'epoch': 2} {'type': 'loss', 'content': 0.10615848004817963, 'timestamp': '2025-10-01 04:29:51.564763', 'step': 13551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:51.596337', 'step': 13551, 'epoch': 2} {'type': 'loss', 'content': 0.12598355114459991, 'timestamp': '2025-10-01 04:29:51.620188', 'step': 13552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.650699', 'step': 13552, 'epoch': 2} {'type': 'loss', 'content': 0.1025235652923584, 'timestamp': '2025-10-01 04:29:51.652634', 'step': 13553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.683043', 'step': 13553, 'epoch': 2} {'type': 'loss', 'content': 0.13115112483501434, 'timestamp': '2025-10-01 04:29:51.685299', 'step': 13554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.716485', 'step': 13554, 'epoch': 2} {'type': 'loss', 'content': 0.17325901985168457, 'timestamp': '2025-10-01 04:29:51.718879', 'step': 13555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.749544', 'step': 13555, 'epoch': 2} {'type': 'loss', 'content': 0.15374207496643066, 'timestamp': '2025-10-01 04:29:51.772961', 'step': 13556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:51.805165', 'step': 13556, 'epoch': 2} {'type': 'loss', 'content': 0.11476562172174454, 'timestamp': '2025-10-01 04:29:51.807467', 'step': 13557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:51.853688', 'step': 13557, 'epoch': 2} {'type': 'loss', 'content': 0.10110226273536682, 'timestamp': '2025-10-01 04:29:51.855796', 'step': 13558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:51.897922', 'step': 13558, 'epoch': 2} {'type': 'loss', 'content': 0.10619281232357025, 'timestamp': '2025-10-01 04:29:51.901245', 'step': 13559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:51.931916', 'step': 13559, 'epoch': 2} {'type': 'loss', 'content': 0.08243659138679504, 'timestamp': '2025-10-01 04:29:51.955548', 'step': 13560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:51.985661', 'step': 13560, 'epoch': 2} {'type': 'loss', 'content': 0.08601714670658112, 'timestamp': '2025-10-01 04:29:51.988438', 'step': 13561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:52.018580', 'step': 13561, 'epoch': 2} {'type': 'loss', 'content': 0.05062848702073097, 'timestamp': '2025-10-01 04:29:52.020788', 'step': 13562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:52.051098', 'step': 13562, 'epoch': 2} {'type': 'loss', 'content': 0.11238009482622147, 'timestamp': '2025-10-01 04:29:52.053339', 'step': 13563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:52.084694', 'step': 13563, 'epoch': 2} {'type': 'loss', 'content': 0.10516397655010223, 'timestamp': '2025-10-01 04:29:52.109055', 'step': 13564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:52.155507', 'step': 13564, 'epoch': 2} {'type': 'loss', 'content': 0.09905626624822617, 'timestamp': '2025-10-01 04:29:52.157773', 'step': 13565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:52.189410', 'step': 13565, 'epoch': 2} {'type': 'loss', 'content': 0.09176818281412125, 'timestamp': '2025-10-01 04:29:52.192094', 'step': 13566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:52.223086', 'step': 13566, 'epoch': 2} {'type': 'loss', 'content': 0.1639460325241089, 'timestamp': '2025-10-01 04:29:52.226462', 'step': 13567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:52.258687', 'step': 13567, 'epoch': 2} {'type': 'loss', 'content': 0.1389247179031372, 'timestamp': '2025-10-01 04:29:52.282458', 'step': 13568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:52.312997', 'step': 13568, 'epoch': 2} {'type': 'loss', 'content': 0.13286292552947998, 'timestamp': '2025-10-01 04:29:52.315318', 'step': 13569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:52.345873', 'step': 13569, 'epoch': 2} {'type': 'loss', 'content': 0.0909801498055458, 'timestamp': '2025-10-01 04:29:52.348030', 'step': 13570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:52.378616', 'step': 13570, 'epoch': 2} {'type': 'loss', 'content': 0.12231642007827759, 'timestamp': '2025-10-01 04:29:52.380905', 'step': 13571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:52.411405', 'step': 13571, 'epoch': 2} {'type': 'loss', 'content': 0.12229868769645691, 'timestamp': '2025-10-01 04:29:52.436110', 'step': 13572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:52.466276', 'step': 13572, 'epoch': 2} {'type': 'loss', 'content': 0.10833989828824997, 'timestamp': '2025-10-01 04:29:52.468442', 'step': 13573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:52.499002', 'step': 13573, 'epoch': 2} {'type': 'loss', 'content': 0.11561105400323868, 'timestamp': '2025-10-01 04:29:52.501227', 'step': 13574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:52.532309', 'step': 13574, 'epoch': 2} {'type': 'loss', 'content': 0.08118610084056854, 'timestamp': '2025-10-01 04:29:52.534544', 'step': 13575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:52.569866', 'step': 13575, 'epoch': 2} {'type': 'loss', 'content': 0.11996346712112427, 'timestamp': '2025-10-01 04:29:52.593599', 'step': 13576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:52.624132', 'step': 13576, 'epoch': 2} {'type': 'loss', 'content': 0.10311651229858398, 'timestamp': '2025-10-01 04:29:52.626393', 'step': 13577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:52.656861', 'step': 13577, 'epoch': 2} {'type': 'loss', 'content': 0.09120048582553864, 'timestamp': '2025-10-01 04:29:52.659486', 'step': 13578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:52.692050', 'step': 13578, 'epoch': 2} {'type': 'loss', 'content': 0.06253770738840103, 'timestamp': '2025-10-01 04:29:52.694446', 'step': 13579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:52.726855', 'step': 13579, 'epoch': 2} {'type': 'loss', 'content': 0.14318698644638062, 'timestamp': '2025-10-01 04:29:52.750634', 'step': 13580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:52.782686', 'step': 13580, 'epoch': 2} {'type': 'loss', 'content': 0.0717640072107315, 'timestamp': '2025-10-01 04:29:52.784859', 'step': 13581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:52.816097', 'step': 13581, 'epoch': 2} {'type': 'loss', 'content': 0.1254524290561676, 'timestamp': '2025-10-01 04:29:52.818021', 'step': 13582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:52.852804', 'step': 13582, 'epoch': 2} {'type': 'loss', 'content': 0.1208062469959259, 'timestamp': '2025-10-01 04:29:52.855303', 'step': 13583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:52.885396', 'step': 13583, 'epoch': 2} {'type': 'loss', 'content': 0.07575755566358566, 'timestamp': '2025-10-01 04:29:52.910808', 'step': 13584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:52.941904', 'step': 13584, 'epoch': 2} {'type': 'loss', 'content': 0.12622161209583282, 'timestamp': '2025-10-01 04:29:52.945763', 'step': 13585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:52.976245', 'step': 13585, 'epoch': 2} {'type': 'loss', 'content': 0.14005105197429657, 'timestamp': '2025-10-01 04:29:52.978580', 'step': 13586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:53.010079', 'step': 13586, 'epoch': 2} {'type': 'loss', 'content': 0.05802701786160469, 'timestamp': '2025-10-01 04:29:53.012774', 'step': 13587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:53.044201', 'step': 13587, 'epoch': 2} {'type': 'loss', 'content': 0.15778584778308868, 'timestamp': '2025-10-01 04:29:53.067899', 'step': 13588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:53.099330', 'step': 13588, 'epoch': 2} {'type': 'loss', 'content': 0.07111025601625443, 'timestamp': '2025-10-01 04:29:53.101462', 'step': 13589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:53.131605', 'step': 13589, 'epoch': 2} {'type': 'loss', 'content': 0.0859721451997757, 'timestamp': '2025-10-01 04:29:53.133756', 'step': 13590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:53.164289', 'step': 13590, 'epoch': 2} {'type': 'loss', 'content': 0.09135391563177109, 'timestamp': '2025-10-01 04:29:53.166432', 'step': 13591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:53.207263', 'step': 13591, 'epoch': 2} {'type': 'loss', 'content': 0.1932813674211502, 'timestamp': '2025-10-01 04:29:53.230830', 'step': 13592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:53.273899', 'step': 13592, 'epoch': 2} {'type': 'loss', 'content': 0.13962170481681824, 'timestamp': '2025-10-01 04:29:53.276151', 'step': 13593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:53.307963', 'step': 13593, 'epoch': 2} {'type': 'loss', 'content': 0.07702924311161041, 'timestamp': '2025-10-01 04:29:53.310217', 'step': 13594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:53.354509', 'step': 13594, 'epoch': 2} {'type': 'loss', 'content': 0.15116067230701447, 'timestamp': '2025-10-01 04:29:53.356743', 'step': 13595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:53.387143', 'step': 13595, 'epoch': 2} {'type': 'loss', 'content': 0.08633974194526672, 'timestamp': '2025-10-01 04:29:53.410805', 'step': 13596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:53.440683', 'step': 13596, 'epoch': 2} {'type': 'loss', 'content': 0.11391506344079971, 'timestamp': '2025-10-01 04:29:53.442999', 'step': 13597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:53.474207', 'step': 13597, 'epoch': 2} {'type': 'loss', 'content': 0.20294877886772156, 'timestamp': '2025-10-01 04:29:53.476430', 'step': 13598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:53.508746', 'step': 13598, 'epoch': 2} {'type': 'loss', 'content': 0.17161040008068085, 'timestamp': '2025-10-01 04:29:53.510730', 'step': 13599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:53.555167', 'step': 13599, 'epoch': 2} {'type': 'loss', 'content': 0.07461395114660263, 'timestamp': '2025-10-01 04:29:53.578755', 'step': 13600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:53.608967', 'step': 13600, 'epoch': 2} {'type': 'loss', 'content': 0.057527489960193634, 'timestamp': '2025-10-01 04:29:53.611053', 'step': 13601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:53.642150', 'step': 13601, 'epoch': 2} {'type': 'loss', 'content': 0.143026664853096, 'timestamp': '2025-10-01 04:29:53.644088', 'step': 13602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:53.675852', 'step': 13602, 'epoch': 2} {'type': 'loss', 'content': 0.08368544280529022, 'timestamp': '2025-10-01 04:29:53.678038', 'step': 13603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:53.708559', 'step': 13603, 'epoch': 2} {'type': 'loss', 'content': 0.07694000005722046, 'timestamp': '2025-10-01 04:29:53.732228', 'step': 13604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:53.763144', 'step': 13604, 'epoch': 2} {'type': 'loss', 'content': 0.1733812540769577, 'timestamp': '2025-10-01 04:29:53.766606', 'step': 13605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:53.796966', 'step': 13605, 'epoch': 2} {'type': 'loss', 'content': 0.08088327199220657, 'timestamp': '2025-10-01 04:29:53.799217', 'step': 13606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:53.829873', 'step': 13606, 'epoch': 2} {'type': 'loss', 'content': 0.1192893460392952, 'timestamp': '2025-10-01 04:29:53.832085', 'step': 13607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:53.862774', 'step': 13607, 'epoch': 2} {'type': 'loss', 'content': 0.09086121618747711, 'timestamp': '2025-10-01 04:29:53.886515', 'step': 13608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:53.917337', 'step': 13608, 'epoch': 2} {'type': 'loss', 'content': 0.10666649043560028, 'timestamp': '2025-10-01 04:29:53.919499', 'step': 13609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:53.950081', 'step': 13609, 'epoch': 2} {'type': 'loss', 'content': 0.1213739812374115, 'timestamp': '2025-10-01 04:29:53.952303', 'step': 13610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:53.983696', 'step': 13610, 'epoch': 2} {'type': 'loss', 'content': 0.1048164963722229, 'timestamp': '2025-10-01 04:29:53.986041', 'step': 13611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:54.016635', 'step': 13611, 'epoch': 2} {'type': 'loss', 'content': 0.07444924116134644, 'timestamp': '2025-10-01 04:29:54.040488', 'step': 13612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:54.071221', 'step': 13612, 'epoch': 2} {'type': 'loss', 'content': 0.17060557007789612, 'timestamp': '2025-10-01 04:29:54.073467', 'step': 13613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:54.104856', 'step': 13613, 'epoch': 2} {'type': 'loss', 'content': 0.0964466780424118, 'timestamp': '2025-10-01 04:29:54.115300', 'step': 13614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:54.145709', 'step': 13614, 'epoch': 2} {'type': 'loss', 'content': 0.11024048924446106, 'timestamp': '2025-10-01 04:29:54.147973', 'step': 13615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:54.179058', 'step': 13615, 'epoch': 2} {'type': 'loss', 'content': 0.16999535262584686, 'timestamp': '2025-10-01 04:29:54.202951', 'step': 13616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:54.233182', 'step': 13616, 'epoch': 2} {'type': 'loss', 'content': 0.10640296339988708, 'timestamp': '2025-10-01 04:29:54.235866', 'step': 13617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:54.273392', 'step': 13617, 'epoch': 2} {'type': 'loss', 'content': 0.22246727347373962, 'timestamp': '2025-10-01 04:29:54.275517', 'step': 13618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:54.315562', 'step': 13618, 'epoch': 2} {'type': 'loss', 'content': 0.10127018392086029, 'timestamp': '2025-10-01 04:29:54.317835', 'step': 13619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:54.347921', 'step': 13619, 'epoch': 2} {'type': 'loss', 'content': 0.056496188044548035, 'timestamp': '2025-10-01 04:29:54.371653', 'step': 13620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:54.402105', 'step': 13620, 'epoch': 2} {'type': 'loss', 'content': 0.061902955174446106, 'timestamp': '2025-10-01 04:29:54.404158', 'step': 13621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:54.434394', 'step': 13621, 'epoch': 2} {'type': 'loss', 'content': 0.12584370374679565, 'timestamp': '2025-10-01 04:29:54.436430', 'step': 13622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:54.467137', 'step': 13622, 'epoch': 2} {'type': 'loss', 'content': 0.08073773235082626, 'timestamp': '2025-10-01 04:29:54.469429', 'step': 13623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:54.501037', 'step': 13623, 'epoch': 2} {'type': 'loss', 'content': 0.10713609308004379, 'timestamp': '2025-10-01 04:29:54.524611', 'step': 13624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:54.554938', 'step': 13624, 'epoch': 2} {'type': 'loss', 'content': 0.09150242060422897, 'timestamp': '2025-10-01 04:29:54.557097', 'step': 13625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:54.587261', 'step': 13625, 'epoch': 2} {'type': 'loss', 'content': 0.06543776392936707, 'timestamp': '2025-10-01 04:29:54.589750', 'step': 13626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:54.620222', 'step': 13626, 'epoch': 2} {'type': 'loss', 'content': 0.10443426668643951, 'timestamp': '2025-10-01 04:29:54.622573', 'step': 13627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:54.655405', 'step': 13627, 'epoch': 2} {'type': 'loss', 'content': 0.06170997396111488, 'timestamp': '2025-10-01 04:29:54.678950', 'step': 13628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:54.710143', 'step': 13628, 'epoch': 2} {'type': 'loss', 'content': 0.08755572140216827, 'timestamp': '2025-10-01 04:29:54.721620', 'step': 13629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:54.765648', 'step': 13629, 'epoch': 2} {'type': 'loss', 'content': 0.07516398280858994, 'timestamp': '2025-10-01 04:29:54.768179', 'step': 13630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:54.798948', 'step': 13630, 'epoch': 2} {'type': 'loss', 'content': 0.0946519747376442, 'timestamp': '2025-10-01 04:29:54.801035', 'step': 13631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:54.833370', 'step': 13631, 'epoch': 2} {'type': 'loss', 'content': 0.0877709835767746, 'timestamp': '2025-10-01 04:29:54.856990', 'step': 13632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:54.888342', 'step': 13632, 'epoch': 2} {'type': 'loss', 'content': 0.03050646372139454, 'timestamp': '2025-10-01 04:29:54.890443', 'step': 13633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:54.925062', 'step': 13633, 'epoch': 2} {'type': 'loss', 'content': 0.15351779758930206, 'timestamp': '2025-10-01 04:29:54.927223', 'step': 13634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:54.971640', 'step': 13634, 'epoch': 2} {'type': 'loss', 'content': 0.07343883812427521, 'timestamp': '2025-10-01 04:29:54.974323', 'step': 13635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:55.006408', 'step': 13635, 'epoch': 2} {'type': 'loss', 'content': 0.09529728442430496, 'timestamp': '2025-10-01 04:29:55.043064', 'step': 13636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:55.076131', 'step': 13636, 'epoch': 2} {'type': 'loss', 'content': 0.12433461844921112, 'timestamp': '2025-10-01 04:29:55.078302', 'step': 13637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:55.119368', 'step': 13637, 'epoch': 2} {'type': 'loss', 'content': 0.1407402902841568, 'timestamp': '2025-10-01 04:29:55.121561', 'step': 13638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:55.152792', 'step': 13638, 'epoch': 2} {'type': 'loss', 'content': 0.0724603608250618, 'timestamp': '2025-10-01 04:29:55.154920', 'step': 13639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:55.186088', 'step': 13639, 'epoch': 2} {'type': 'loss', 'content': 0.2014503926038742, 'timestamp': '2025-10-01 04:29:55.216388', 'step': 13640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:55.249167', 'step': 13640, 'epoch': 2} {'type': 'loss', 'content': 0.05657914653420448, 'timestamp': '2025-10-01 04:29:55.251450', 'step': 13641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:55.283372', 'step': 13641, 'epoch': 2} {'type': 'loss', 'content': 0.09247474372386932, 'timestamp': '2025-10-01 04:29:55.286836', 'step': 13642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:55.318340', 'step': 13642, 'epoch': 2} {'type': 'loss', 'content': 0.1518944650888443, 'timestamp': '2025-10-01 04:29:55.321663', 'step': 13643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:55.353721', 'step': 13643, 'epoch': 2} {'type': 'loss', 'content': 0.11587381362915039, 'timestamp': '2025-10-01 04:29:55.377554', 'step': 13644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:55.415865', 'step': 13644, 'epoch': 2} {'type': 'loss', 'content': 0.09617708623409271, 'timestamp': '2025-10-01 04:29:55.418028', 'step': 13645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:55.458954', 'step': 13645, 'epoch': 2} {'type': 'loss', 'content': 0.06685288995504379, 'timestamp': '2025-10-01 04:29:55.461679', 'step': 13646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:55.492648', 'step': 13646, 'epoch': 2} {'type': 'loss', 'content': 0.11960523575544357, 'timestamp': '2025-10-01 04:29:55.494733', 'step': 13647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:55.526416', 'step': 13647, 'epoch': 2} {'type': 'loss', 'content': 0.06996916979551315, 'timestamp': '2025-10-01 04:29:55.550302', 'step': 13648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:55.580838', 'step': 13648, 'epoch': 2} {'type': 'loss', 'content': 0.03318975120782852, 'timestamp': '2025-10-01 04:29:55.582980', 'step': 13649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:55.613939', 'step': 13649, 'epoch': 2} {'type': 'loss', 'content': 0.1540929228067398, 'timestamp': '2025-10-01 04:29:55.616104', 'step': 13650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:55.646646', 'step': 13650, 'epoch': 2} {'type': 'loss', 'content': 0.05481602996587753, 'timestamp': '2025-10-01 04:29:55.648854', 'step': 13651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:55.679626', 'step': 13651, 'epoch': 2} {'type': 'loss', 'content': 0.17832739651203156, 'timestamp': '2025-10-01 04:29:55.703310', 'step': 13652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:55.735091', 'step': 13652, 'epoch': 2} {'type': 'loss', 'content': 0.14542187750339508, 'timestamp': '2025-10-01 04:29:55.737276', 'step': 13653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:55.770300', 'step': 13653, 'epoch': 2} {'type': 'loss', 'content': 0.1745123267173767, 'timestamp': '2025-10-01 04:29:55.780985', 'step': 13654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:55.811198', 'step': 13654, 'epoch': 2} {'type': 'loss', 'content': 0.1188155934214592, 'timestamp': '2025-10-01 04:29:55.820079', 'step': 13655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:55.851242', 'step': 13655, 'epoch': 2} {'type': 'loss', 'content': 0.0748773068189621, 'timestamp': '2025-10-01 04:29:55.874930', 'step': 13656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:55.905746', 'step': 13656, 'epoch': 2} {'type': 'loss', 'content': 0.13574428856372833, 'timestamp': '2025-10-01 04:29:55.907878', 'step': 13657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:55.938225', 'step': 13657, 'epoch': 2} {'type': 'loss', 'content': 0.07140368968248367, 'timestamp': '2025-10-01 04:29:55.940664', 'step': 13658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:55.970926', 'step': 13658, 'epoch': 2} {'type': 'loss', 'content': 0.06303443759679794, 'timestamp': '2025-10-01 04:29:55.973231', 'step': 13659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:56.003908', 'step': 13659, 'epoch': 2} {'type': 'loss', 'content': 0.08565723896026611, 'timestamp': '2025-10-01 04:29:56.028806', 'step': 13660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:56.059618', 'step': 13660, 'epoch': 2} {'type': 'loss', 'content': 0.21179179847240448, 'timestamp': '2025-10-01 04:29:56.061859', 'step': 13661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:56.092275', 'step': 13661, 'epoch': 2} {'type': 'loss', 'content': 0.09785974770784378, 'timestamp': '2025-10-01 04:29:56.094331', 'step': 13662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:56.126132', 'step': 13662, 'epoch': 2} {'type': 'loss', 'content': 0.09430045634508133, 'timestamp': '2025-10-01 04:29:56.128257', 'step': 13663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:56.159516', 'step': 13663, 'epoch': 2} {'type': 'loss', 'content': 0.11388886719942093, 'timestamp': '2025-10-01 04:29:56.183366', 'step': 13664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:56.214224', 'step': 13664, 'epoch': 2} {'type': 'loss', 'content': 0.09378223121166229, 'timestamp': '2025-10-01 04:29:56.216299', 'step': 13665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:56.248570', 'step': 13665, 'epoch': 2} {'type': 'loss', 'content': 0.2022991180419922, 'timestamp': '2025-10-01 04:29:56.250647', 'step': 13666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:56.281092', 'step': 13666, 'epoch': 2} {'type': 'loss', 'content': 0.08770796656608582, 'timestamp': '2025-10-01 04:29:56.283178', 'step': 13667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:56.315399', 'step': 13667, 'epoch': 2} {'type': 'loss', 'content': 0.09529486298561096, 'timestamp': '2025-10-01 04:29:56.339179', 'step': 13668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:56.375817', 'step': 13668, 'epoch': 2} {'type': 'loss', 'content': 0.13769203424453735, 'timestamp': '2025-10-01 04:29:56.378252', 'step': 13669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:56.412233', 'step': 13669, 'epoch': 2} {'type': 'loss', 'content': 0.13614891469478607, 'timestamp': '2025-10-01 04:29:56.414464', 'step': 13670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:56.446309', 'step': 13670, 'epoch': 2} {'type': 'loss', 'content': 0.1369825005531311, 'timestamp': '2025-10-01 04:29:56.448406', 'step': 13671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:56.480507', 'step': 13671, 'epoch': 2} {'type': 'loss', 'content': 0.11367349326610565, 'timestamp': '2025-10-01 04:29:56.504302', 'step': 13672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:56.536133', 'step': 13672, 'epoch': 2} {'type': 'loss', 'content': 0.16219910979270935, 'timestamp': '2025-10-01 04:29:56.539113', 'step': 13673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:56.585555', 'step': 13673, 'epoch': 2} {'type': 'loss', 'content': 0.11172597855329514, 'timestamp': '2025-10-01 04:29:56.587661', 'step': 13674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:56.625943', 'step': 13674, 'epoch': 2} {'type': 'loss', 'content': 0.06312970817089081, 'timestamp': '2025-10-01 04:29:56.628089', 'step': 13675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:56.659198', 'step': 13675, 'epoch': 2} {'type': 'loss', 'content': 0.07746610790491104, 'timestamp': '2025-10-01 04:29:56.683154', 'step': 13676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:56.739473', 'step': 13676, 'epoch': 2} {'type': 'loss', 'content': 0.08272532373666763, 'timestamp': '2025-10-01 04:29:56.741831', 'step': 13677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:56.776402', 'step': 13677, 'epoch': 2} {'type': 'loss', 'content': 0.18968191742897034, 'timestamp': '2025-10-01 04:29:56.795982', 'step': 13678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:56.833282', 'step': 13678, 'epoch': 2} {'type': 'loss', 'content': 0.17601197957992554, 'timestamp': '2025-10-01 04:29:56.840320', 'step': 13679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:56.871876', 'step': 13679, 'epoch': 2} {'type': 'loss', 'content': 0.12013853341341019, 'timestamp': '2025-10-01 04:29:56.895673', 'step': 13680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:56.931791', 'step': 13680, 'epoch': 2} {'type': 'loss', 'content': 0.06896917521953583, 'timestamp': '2025-10-01 04:29:56.934906', 'step': 13681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:56.966886', 'step': 13681, 'epoch': 2} {'type': 'loss', 'content': 0.08753866702318192, 'timestamp': '2025-10-01 04:29:56.969041', 'step': 13682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:57.001047', 'step': 13682, 'epoch': 2} {'type': 'loss', 'content': 0.0597451850771904, 'timestamp': '2025-10-01 04:29:57.003190', 'step': 13683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:57.035374', 'step': 13683, 'epoch': 2} {'type': 'loss', 'content': 0.09318628162145615, 'timestamp': '2025-10-01 04:29:57.059539', 'step': 13684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:57.094847', 'step': 13684, 'epoch': 2} {'type': 'loss', 'content': 0.03467842936515808, 'timestamp': '2025-10-01 04:29:57.096885', 'step': 13685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:57.128738', 'step': 13685, 'epoch': 2} {'type': 'loss', 'content': 0.09576237946748734, 'timestamp': '2025-10-01 04:29:57.131582', 'step': 13686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:57.165580', 'step': 13686, 'epoch': 2} {'type': 'loss', 'content': 0.14222075045108795, 'timestamp': '2025-10-01 04:29:57.168266', 'step': 13687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:57.201069', 'step': 13687, 'epoch': 2} {'type': 'loss', 'content': 0.18203136324882507, 'timestamp': '2025-10-01 04:29:57.234760', 'step': 13688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:57.269989', 'step': 13688, 'epoch': 2} {'type': 'loss', 'content': 0.0659438893198967, 'timestamp': '2025-10-01 04:29:57.272225', 'step': 13689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:57.313683', 'step': 13689, 'epoch': 2} {'type': 'loss', 'content': 0.1391938030719757, 'timestamp': '2025-10-01 04:29:57.315855', 'step': 13690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:57.348635', 'step': 13690, 'epoch': 2} {'type': 'loss', 'content': 0.07262584567070007, 'timestamp': '2025-10-01 04:29:57.351291', 'step': 13691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:57.384104', 'step': 13691, 'epoch': 2} {'type': 'loss', 'content': 0.1833743453025818, 'timestamp': '2025-10-01 04:29:57.408109', 'step': 13692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:57.439702', 'step': 13692, 'epoch': 2} {'type': 'loss', 'content': 0.07476501166820526, 'timestamp': '2025-10-01 04:29:57.447224', 'step': 13693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:57.478947', 'step': 13693, 'epoch': 2} {'type': 'loss', 'content': 0.12279737740755081, 'timestamp': '2025-10-01 04:29:57.481092', 'step': 13694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:57.512610', 'step': 13694, 'epoch': 2} {'type': 'loss', 'content': 0.10327665507793427, 'timestamp': '2025-10-01 04:29:57.514958', 'step': 13695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:57.546015', 'step': 13695, 'epoch': 2} {'type': 'loss', 'content': 0.09800659120082855, 'timestamp': '2025-10-01 04:29:57.569659', 'step': 13696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:57.599687', 'step': 13696, 'epoch': 2} {'type': 'loss', 'content': 0.11007731407880783, 'timestamp': '2025-10-01 04:29:57.603091', 'step': 13697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:57.635403', 'step': 13697, 'epoch': 2} {'type': 'loss', 'content': 0.07585972547531128, 'timestamp': '2025-10-01 04:29:57.637764', 'step': 13698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:57.668678', 'step': 13698, 'epoch': 2} {'type': 'loss', 'content': 0.13943201303482056, 'timestamp': '2025-10-01 04:29:57.677369', 'step': 13699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:57.718862', 'step': 13699, 'epoch': 2} {'type': 'loss', 'content': 0.13387681543827057, 'timestamp': '2025-10-01 04:29:57.743247', 'step': 13700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:57.774776', 'step': 13700, 'epoch': 2} {'type': 'loss', 'content': 0.09365592151880264, 'timestamp': '2025-10-01 04:29:57.777079', 'step': 13701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:57.807370', 'step': 13701, 'epoch': 2} {'type': 'loss', 'content': 0.18341505527496338, 'timestamp': '2025-10-01 04:29:57.810297', 'step': 13702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:57.841864', 'step': 13702, 'epoch': 2} {'type': 'loss', 'content': 0.07919164001941681, 'timestamp': '2025-10-01 04:29:57.844434', 'step': 13703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:57.883415', 'step': 13703, 'epoch': 2} {'type': 'loss', 'content': 0.09005352854728699, 'timestamp': '2025-10-01 04:29:57.920111', 'step': 13704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:57.951247', 'step': 13704, 'epoch': 2} {'type': 'loss', 'content': 0.032767701894044876, 'timestamp': '2025-10-01 04:29:57.953834', 'step': 13705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:57.984873', 'step': 13705, 'epoch': 2} {'type': 'loss', 'content': 0.0745420828461647, 'timestamp': '2025-10-01 04:29:57.987199', 'step': 13706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:58.017814', 'step': 13706, 'epoch': 2} {'type': 'loss', 'content': 0.14435520768165588, 'timestamp': '2025-10-01 04:29:58.020906', 'step': 13707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.051685', 'step': 13707, 'epoch': 2} {'type': 'loss', 'content': 0.05418587476015091, 'timestamp': '2025-10-01 04:29:58.075814', 'step': 13708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:58.107345', 'step': 13708, 'epoch': 2} {'type': 'loss', 'content': 0.10502468794584274, 'timestamp': '2025-10-01 04:29:58.109918', 'step': 13709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.141309', 'step': 13709, 'epoch': 2} {'type': 'loss', 'content': 0.1766480952501297, 'timestamp': '2025-10-01 04:29:58.144261', 'step': 13710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.175834', 'step': 13710, 'epoch': 2} {'type': 'loss', 'content': 0.060129884630441666, 'timestamp': '2025-10-01 04:29:58.189148', 'step': 13711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:58.221379', 'step': 13711, 'epoch': 2} {'type': 'loss', 'content': 0.06506690382957458, 'timestamp': '2025-10-01 04:29:58.245727', 'step': 13712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:58.277405', 'step': 13712, 'epoch': 2} {'type': 'loss', 'content': 0.03525133803486824, 'timestamp': '2025-10-01 04:29:58.279770', 'step': 13713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.310567', 'step': 13713, 'epoch': 2} {'type': 'loss', 'content': 0.18843859434127808, 'timestamp': '2025-10-01 04:29:58.319085', 'step': 13714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:58.350780', 'step': 13714, 'epoch': 2} {'type': 'loss', 'content': 0.133822962641716, 'timestamp': '2025-10-01 04:29:58.353282', 'step': 13715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:29:58.385727', 'step': 13715, 'epoch': 2} {'type': 'loss', 'content': 0.08920919895172119, 'timestamp': '2025-10-01 04:29:58.411038', 'step': 13716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:58.443754', 'step': 13716, 'epoch': 2} {'type': 'loss', 'content': 0.08443091064691544, 'timestamp': '2025-10-01 04:29:58.446193', 'step': 13717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:58.477481', 'step': 13717, 'epoch': 2} {'type': 'loss', 'content': 0.09933451563119888, 'timestamp': '2025-10-01 04:29:58.480002', 'step': 13718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:29:58.511461', 'step': 13718, 'epoch': 2} {'type': 'loss', 'content': 0.13657711446285248, 'timestamp': '2025-10-01 04:29:58.515747', 'step': 13719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.546989', 'step': 13719, 'epoch': 2} {'type': 'loss', 'content': 0.0899636447429657, 'timestamp': '2025-10-01 04:29:58.581868', 'step': 13720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.612540', 'step': 13720, 'epoch': 2} {'type': 'loss', 'content': 0.0426529124379158, 'timestamp': '2025-10-01 04:29:58.614815', 'step': 13721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.645832', 'step': 13721, 'epoch': 2} {'type': 'loss', 'content': 0.053624555468559265, 'timestamp': '2025-10-01 04:29:58.648412', 'step': 13722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:58.679068', 'step': 13722, 'epoch': 2} {'type': 'loss', 'content': 0.06615980714559555, 'timestamp': '2025-10-01 04:29:58.681542', 'step': 13723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:58.713268', 'step': 13723, 'epoch': 2} {'type': 'loss', 'content': 0.15882602334022522, 'timestamp': '2025-10-01 04:29:58.737098', 'step': 13724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.768270', 'step': 13724, 'epoch': 2} {'type': 'loss', 'content': 0.11467179656028748, 'timestamp': '2025-10-01 04:29:58.770794', 'step': 13725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:58.802348', 'step': 13725, 'epoch': 2} {'type': 'loss', 'content': 0.13175611197948456, 'timestamp': '2025-10-01 04:29:58.804860', 'step': 13726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.842605', 'step': 13726, 'epoch': 2} {'type': 'loss', 'content': 0.08042098581790924, 'timestamp': '2025-10-01 04:29:58.845124', 'step': 13727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.875659', 'step': 13727, 'epoch': 2} {'type': 'loss', 'content': 0.1143047884106636, 'timestamp': '2025-10-01 04:29:58.900195', 'step': 13728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:58.932343', 'step': 13728, 'epoch': 2} {'type': 'loss', 'content': 0.12378619611263275, 'timestamp': '2025-10-01 04:29:58.934907', 'step': 13729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:58.966631', 'step': 13729, 'epoch': 2} {'type': 'loss', 'content': 0.09479359537363052, 'timestamp': '2025-10-01 04:29:58.977330', 'step': 13730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:59.008859', 'step': 13730, 'epoch': 2} {'type': 'loss', 'content': 0.12244603037834167, 'timestamp': '2025-10-01 04:29:59.011850', 'step': 13731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:59.042454', 'step': 13731, 'epoch': 2} {'type': 'loss', 'content': 0.09456192702054977, 'timestamp': '2025-10-01 04:29:59.068196', 'step': 13732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:59.099681', 'step': 13732, 'epoch': 2} {'type': 'loss', 'content': 0.0668661817908287, 'timestamp': '2025-10-01 04:29:59.102051', 'step': 13733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:59.136060', 'step': 13733, 'epoch': 2} {'type': 'loss', 'content': 0.12815463542938232, 'timestamp': '2025-10-01 04:29:59.138574', 'step': 13734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:59.179751', 'step': 13734, 'epoch': 2} {'type': 'loss', 'content': 0.09559814631938934, 'timestamp': '2025-10-01 04:29:59.181893', 'step': 13735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:59.212772', 'step': 13735, 'epoch': 2} {'type': 'loss', 'content': 0.18531624972820282, 'timestamp': '2025-10-01 04:29:59.237361', 'step': 13736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:59.268063', 'step': 13736, 'epoch': 2} {'type': 'loss', 'content': 0.049194879829883575, 'timestamp': '2025-10-01 04:29:59.270309', 'step': 13737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:59.301219', 'step': 13737, 'epoch': 2} {'type': 'loss', 'content': 0.1607101708650589, 'timestamp': '2025-10-01 04:29:59.303338', 'step': 13738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:59.333799', 'step': 13738, 'epoch': 2} {'type': 'loss', 'content': 0.1281702071428299, 'timestamp': '2025-10-01 04:29:59.336605', 'step': 13739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:29:59.367066', 'step': 13739, 'epoch': 2} {'type': 'loss', 'content': 0.09956197440624237, 'timestamp': '2025-10-01 04:29:59.391025', 'step': 13740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:59.421270', 'step': 13740, 'epoch': 2} {'type': 'loss', 'content': 0.13236004114151, 'timestamp': '2025-10-01 04:29:59.424142', 'step': 13741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:59.455242', 'step': 13741, 'epoch': 2} {'type': 'loss', 'content': 0.04013870656490326, 'timestamp': '2025-10-01 04:29:59.457612', 'step': 13742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:59.489464', 'step': 13742, 'epoch': 2} {'type': 'loss', 'content': 0.21227042376995087, 'timestamp': '2025-10-01 04:29:59.491850', 'step': 13743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:29:59.525865', 'step': 13743, 'epoch': 2} {'type': 'loss', 'content': 0.09764841943979263, 'timestamp': '2025-10-01 04:29:59.549507', 'step': 13744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:59.580818', 'step': 13744, 'epoch': 2} {'type': 'loss', 'content': 0.1364036649465561, 'timestamp': '2025-10-01 04:29:59.583051', 'step': 13745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:29:59.614004', 'step': 13745, 'epoch': 2} {'type': 'loss', 'content': 0.09243402630090714, 'timestamp': '2025-10-01 04:29:59.616023', 'step': 13746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:59.646480', 'step': 13746, 'epoch': 2} {'type': 'loss', 'content': 0.10691569745540619, 'timestamp': '2025-10-01 04:29:59.660509', 'step': 13747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:59.690357', 'step': 13747, 'epoch': 2} {'type': 'loss', 'content': 0.14291338622570038, 'timestamp': '2025-10-01 04:29:59.714000', 'step': 13748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:59.743566', 'step': 13748, 'epoch': 2} {'type': 'loss', 'content': 0.11121386289596558, 'timestamp': '2025-10-01 04:29:59.745736', 'step': 13749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:59.776078', 'step': 13749, 'epoch': 2} {'type': 'loss', 'content': 0.12180168926715851, 'timestamp': '2025-10-01 04:29:59.778284', 'step': 13750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:59.809596', 'step': 13750, 'epoch': 2} {'type': 'loss', 'content': 0.12959924340248108, 'timestamp': '2025-10-01 04:29:59.825571', 'step': 13751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:59.864053', 'step': 13751, 'epoch': 2} {'type': 'loss', 'content': 0.1588340401649475, 'timestamp': '2025-10-01 04:29:59.887730', 'step': 13752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:29:59.929531', 'step': 13752, 'epoch': 2} {'type': 'loss', 'content': 0.11328383535146713, 'timestamp': '2025-10-01 04:29:59.931697', 'step': 13753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:29:59.964480', 'step': 13753, 'epoch': 2} {'type': 'loss', 'content': 0.22813336551189423, 'timestamp': '2025-10-01 04:29:59.966899', 'step': 13754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:29:59.998113', 'step': 13754, 'epoch': 2} {'type': 'loss', 'content': 0.17013059556484222, 'timestamp': '2025-10-01 04:30:00.005742', 'step': 13755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:00.036421', 'step': 13755, 'epoch': 2} {'type': 'loss', 'content': 0.0684235543012619, 'timestamp': '2025-10-01 04:30:00.060511', 'step': 13756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:00.091064', 'step': 13756, 'epoch': 2} {'type': 'loss', 'content': 0.071609266102314, 'timestamp': '2025-10-01 04:30:00.093372', 'step': 13757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:00.123198', 'step': 13757, 'epoch': 2} {'type': 'loss', 'content': 0.11827989667654037, 'timestamp': '2025-10-01 04:30:00.125380', 'step': 13758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:00.155683', 'step': 13758, 'epoch': 2} {'type': 'loss', 'content': 0.10140476375818253, 'timestamp': '2025-10-01 04:30:00.166187', 'step': 13759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:00.196859', 'step': 13759, 'epoch': 2} {'type': 'loss', 'content': 0.154950350522995, 'timestamp': '2025-10-01 04:30:00.227297', 'step': 13760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:00.258608', 'step': 13760, 'epoch': 2} {'type': 'loss', 'content': 0.13276712596416473, 'timestamp': '2025-10-01 04:30:00.260791', 'step': 13761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:00.297305', 'step': 13761, 'epoch': 2} {'type': 'loss', 'content': 0.0917857438325882, 'timestamp': '2025-10-01 04:30:00.307742', 'step': 13762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:00.338584', 'step': 13762, 'epoch': 2} {'type': 'loss', 'content': 0.060122523456811905, 'timestamp': '2025-10-01 04:30:00.341007', 'step': 13763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:00.372067', 'step': 13763, 'epoch': 2} {'type': 'loss', 'content': 0.10054294019937515, 'timestamp': '2025-10-01 04:30:00.395715', 'step': 13764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:00.426848', 'step': 13764, 'epoch': 2} {'type': 'loss', 'content': 0.10600076615810394, 'timestamp': '2025-10-01 04:30:00.429039', 'step': 13765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:00.460503', 'step': 13765, 'epoch': 2} {'type': 'loss', 'content': 0.09433813393115997, 'timestamp': '2025-10-01 04:30:00.462675', 'step': 13766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:00.495244', 'step': 13766, 'epoch': 2} {'type': 'loss', 'content': 0.2076706886291504, 'timestamp': '2025-10-01 04:30:00.497745', 'step': 13767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:30:00.529997', 'step': 13767, 'epoch': 2} {'type': 'loss', 'content': 0.05280106142163277, 'timestamp': '2025-10-01 04:30:00.555582', 'step': 13768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:00.585759', 'step': 13768, 'epoch': 2} {'type': 'loss', 'content': 0.10595561563968658, 'timestamp': '2025-10-01 04:30:00.593536', 'step': 13769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:00.633288', 'step': 13769, 'epoch': 2} {'type': 'loss', 'content': 0.08932604640722275, 'timestamp': '2025-10-01 04:30:00.636433', 'step': 13770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:00.667047', 'step': 13770, 'epoch': 2} {'type': 'loss', 'content': 0.11036965996026993, 'timestamp': '2025-10-01 04:30:00.669527', 'step': 13771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:00.699936', 'step': 13771, 'epoch': 2} {'type': 'loss', 'content': 0.018908051773905754, 'timestamp': '2025-10-01 04:30:00.730916', 'step': 13772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:00.762291', 'step': 13772, 'epoch': 2} {'type': 'loss', 'content': 0.13523121178150177, 'timestamp': '2025-10-01 04:30:00.779368', 'step': 13773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:00.818740', 'step': 13773, 'epoch': 2} {'type': 'loss', 'content': 0.08599473536014557, 'timestamp': '2025-10-01 04:30:00.822741', 'step': 13774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:00.855212', 'step': 13774, 'epoch': 2} {'type': 'loss', 'content': 0.15272948145866394, 'timestamp': '2025-10-01 04:30:00.857533', 'step': 13775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:00.889094', 'step': 13775, 'epoch': 2} {'type': 'loss', 'content': 0.17819459736347198, 'timestamp': '2025-10-01 04:30:00.912757', 'step': 13776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:00.943411', 'step': 13776, 'epoch': 2} {'type': 'loss', 'content': 0.183561772108078, 'timestamp': '2025-10-01 04:30:00.945934', 'step': 13777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:00.976455', 'step': 13777, 'epoch': 2} {'type': 'loss', 'content': 0.21881230175495148, 'timestamp': '2025-10-01 04:30:00.978759', 'step': 13778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.009453', 'step': 13778, 'epoch': 2} {'type': 'loss', 'content': 0.1887415498495102, 'timestamp': '2025-10-01 04:30:01.011664', 'step': 13779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:01.043090', 'step': 13779, 'epoch': 2} {'type': 'loss', 'content': 0.0944279208779335, 'timestamp': '2025-10-01 04:30:01.066635', 'step': 13780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.096824', 'step': 13780, 'epoch': 2} {'type': 'loss', 'content': 0.12214502692222595, 'timestamp': '2025-10-01 04:30:01.098977', 'step': 13781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:01.128833', 'step': 13781, 'epoch': 2} {'type': 'loss', 'content': 0.1792076826095581, 'timestamp': '2025-10-01 04:30:01.130812', 'step': 13782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:01.161053', 'step': 13782, 'epoch': 2} {'type': 'loss', 'content': 0.09869541972875595, 'timestamp': '2025-10-01 04:30:01.163512', 'step': 13783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.195038', 'step': 13783, 'epoch': 2} {'type': 'loss', 'content': 0.08056115359067917, 'timestamp': '2025-10-01 04:30:01.218819', 'step': 13784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:01.249704', 'step': 13784, 'epoch': 2} {'type': 'loss', 'content': 0.09950772672891617, 'timestamp': '2025-10-01 04:30:01.251996', 'step': 13785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:01.283176', 'step': 13785, 'epoch': 2} {'type': 'loss', 'content': 0.05233277007937431, 'timestamp': '2025-10-01 04:30:01.285622', 'step': 13786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:01.316340', 'step': 13786, 'epoch': 2} {'type': 'loss', 'content': 0.04317297786474228, 'timestamp': '2025-10-01 04:30:01.319601', 'step': 13787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.350315', 'step': 13787, 'epoch': 2} {'type': 'loss', 'content': 0.06850980967283249, 'timestamp': '2025-10-01 04:30:01.385573', 'step': 13788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:01.416336', 'step': 13788, 'epoch': 2} {'type': 'loss', 'content': 0.14689703285694122, 'timestamp': '2025-10-01 04:30:01.418526', 'step': 13789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.448999', 'step': 13789, 'epoch': 2} {'type': 'loss', 'content': 0.1242871806025505, 'timestamp': '2025-10-01 04:30:01.451533', 'step': 13790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.482296', 'step': 13790, 'epoch': 2} {'type': 'loss', 'content': 0.12780824303627014, 'timestamp': '2025-10-01 04:30:01.498930', 'step': 13791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.529336', 'step': 13791, 'epoch': 2} {'type': 'loss', 'content': 0.06531579792499542, 'timestamp': '2025-10-01 04:30:01.553088', 'step': 13792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:01.583685', 'step': 13792, 'epoch': 2} {'type': 'loss', 'content': 0.059619203209877014, 'timestamp': '2025-10-01 04:30:01.586081', 'step': 13793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:01.619256', 'step': 13793, 'epoch': 2} {'type': 'loss', 'content': 0.11104924231767654, 'timestamp': '2025-10-01 04:30:01.623945', 'step': 13794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.655329', 'step': 13794, 'epoch': 2} {'type': 'loss', 'content': 0.09391389042139053, 'timestamp': '2025-10-01 04:30:01.657545', 'step': 13795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.687899', 'step': 13795, 'epoch': 2} {'type': 'loss', 'content': 0.14335495233535767, 'timestamp': '2025-10-01 04:30:01.711314', 'step': 13796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:01.742622', 'step': 13796, 'epoch': 2} {'type': 'loss', 'content': 0.07506855577230453, 'timestamp': '2025-10-01 04:30:01.757126', 'step': 13797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:01.789247', 'step': 13797, 'epoch': 2} {'type': 'loss', 'content': 0.13728828728199005, 'timestamp': '2025-10-01 04:30:01.792263', 'step': 13798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:01.822943', 'step': 13798, 'epoch': 2} {'type': 'loss', 'content': 0.11188767850399017, 'timestamp': '2025-10-01 04:30:01.840611', 'step': 13799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.886692', 'step': 13799, 'epoch': 2} {'type': 'loss', 'content': 0.0880967229604721, 'timestamp': '2025-10-01 04:30:01.911553', 'step': 13800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:01.942873', 'step': 13800, 'epoch': 2} {'type': 'loss', 'content': 0.11779405921697617, 'timestamp': '2025-10-01 04:30:01.944959', 'step': 13801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:01.975468', 'step': 13801, 'epoch': 2} {'type': 'loss', 'content': 0.13092735409736633, 'timestamp': '2025-10-01 04:30:01.977651', 'step': 13802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.016812', 'step': 13802, 'epoch': 2} {'type': 'loss', 'content': 0.18427252769470215, 'timestamp': '2025-10-01 04:30:02.027768', 'step': 13803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.059016', 'step': 13803, 'epoch': 2} {'type': 'loss', 'content': 0.09648045152425766, 'timestamp': '2025-10-01 04:30:02.082815', 'step': 13804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:02.112660', 'step': 13804, 'epoch': 2} {'type': 'loss', 'content': 0.16918860375881195, 'timestamp': '2025-10-01 04:30:02.115085', 'step': 13805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.145670', 'step': 13805, 'epoch': 2} {'type': 'loss', 'content': 0.06195031479001045, 'timestamp': '2025-10-01 04:30:02.147888', 'step': 13806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.179479', 'step': 13806, 'epoch': 2} {'type': 'loss', 'content': 0.08893156796693802, 'timestamp': '2025-10-01 04:30:02.181764', 'step': 13807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:02.212316', 'step': 13807, 'epoch': 2} {'type': 'loss', 'content': 0.07294676452875137, 'timestamp': '2025-10-01 04:30:02.236131', 'step': 13808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:02.267248', 'step': 13808, 'epoch': 2} {'type': 'loss', 'content': 0.08096672594547272, 'timestamp': '2025-10-01 04:30:02.269726', 'step': 13809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.300425', 'step': 13809, 'epoch': 2} {'type': 'loss', 'content': 0.12829673290252686, 'timestamp': '2025-10-01 04:30:02.302661', 'step': 13810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:02.333448', 'step': 13810, 'epoch': 2} {'type': 'loss', 'content': 0.06986391544342041, 'timestamp': '2025-10-01 04:30:02.335771', 'step': 13811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:02.367019', 'step': 13811, 'epoch': 2} {'type': 'loss', 'content': 0.07717031985521317, 'timestamp': '2025-10-01 04:30:02.391093', 'step': 13812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.422503', 'step': 13812, 'epoch': 2} {'type': 'loss', 'content': 0.1098165437579155, 'timestamp': '2025-10-01 04:30:02.424693', 'step': 13813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:02.459952', 'step': 13813, 'epoch': 2} {'type': 'loss', 'content': 0.1254664659500122, 'timestamp': '2025-10-01 04:30:02.462179', 'step': 13814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:02.494037', 'step': 13814, 'epoch': 2} {'type': 'loss', 'content': 0.149379163980484, 'timestamp': '2025-10-01 04:30:02.496382', 'step': 13815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.527283', 'step': 13815, 'epoch': 2} {'type': 'loss', 'content': 0.10408839583396912, 'timestamp': '2025-10-01 04:30:02.551281', 'step': 13816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:02.586218', 'step': 13816, 'epoch': 2} {'type': 'loss', 'content': 0.0948801189661026, 'timestamp': '2025-10-01 04:30:02.588648', 'step': 13817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.619294', 'step': 13817, 'epoch': 2} {'type': 'loss', 'content': 0.12573251128196716, 'timestamp': '2025-10-01 04:30:02.621451', 'step': 13818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:02.652292', 'step': 13818, 'epoch': 2} {'type': 'loss', 'content': 0.08905139565467834, 'timestamp': '2025-10-01 04:30:02.654627', 'step': 13819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.684996', 'step': 13819, 'epoch': 2} {'type': 'loss', 'content': 0.1984953135251999, 'timestamp': '2025-10-01 04:30:02.710178', 'step': 13820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:02.744702', 'step': 13820, 'epoch': 2} {'type': 'loss', 'content': 0.07330556213855743, 'timestamp': '2025-10-01 04:30:02.746820', 'step': 13821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.777118', 'step': 13821, 'epoch': 2} {'type': 'loss', 'content': 0.08806557208299637, 'timestamp': '2025-10-01 04:30:02.779871', 'step': 13822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.810815', 'step': 13822, 'epoch': 2} {'type': 'loss', 'content': 0.10959067195653915, 'timestamp': '2025-10-01 04:30:02.813238', 'step': 13823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:02.844124', 'step': 13823, 'epoch': 2} {'type': 'loss', 'content': 0.1491057425737381, 'timestamp': '2025-10-01 04:30:02.871274', 'step': 13824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:02.904763', 'step': 13824, 'epoch': 2} {'type': 'loss', 'content': 0.1494600772857666, 'timestamp': '2025-10-01 04:30:02.907233', 'step': 13825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:02.938567', 'step': 13825, 'epoch': 2} {'type': 'loss', 'content': 0.08605895191431046, 'timestamp': '2025-10-01 04:30:02.941526', 'step': 13826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:02.984683', 'step': 13826, 'epoch': 2} {'type': 'loss', 'content': 0.10478004068136215, 'timestamp': '2025-10-01 04:30:02.987249', 'step': 13827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:03.018482', 'step': 13827, 'epoch': 2} {'type': 'loss', 'content': 0.0969456359744072, 'timestamp': '2025-10-01 04:30:03.042242', 'step': 13828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:03.074183', 'step': 13828, 'epoch': 2} {'type': 'loss', 'content': 0.10949218273162842, 'timestamp': '2025-10-01 04:30:03.076802', 'step': 13829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:03.113358', 'step': 13829, 'epoch': 2} {'type': 'loss', 'content': 0.14609742164611816, 'timestamp': '2025-10-01 04:30:03.115470', 'step': 13830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:03.146074', 'step': 13830, 'epoch': 2} {'type': 'loss', 'content': 0.08643247932195663, 'timestamp': '2025-10-01 04:30:03.148208', 'step': 13831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:03.185205', 'step': 13831, 'epoch': 2} {'type': 'loss', 'content': 0.07373357564210892, 'timestamp': '2025-10-01 04:30:03.208872', 'step': 13832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:03.241290', 'step': 13832, 'epoch': 2} {'type': 'loss', 'content': 0.11324431002140045, 'timestamp': '2025-10-01 04:30:03.244150', 'step': 13833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:03.275542', 'step': 13833, 'epoch': 2} {'type': 'loss', 'content': 0.07279098778963089, 'timestamp': '2025-10-01 04:30:03.284564', 'step': 13834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:03.314659', 'step': 13834, 'epoch': 2} {'type': 'loss', 'content': 0.07237641513347626, 'timestamp': '2025-10-01 04:30:03.316875', 'step': 13835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:03.348654', 'step': 13835, 'epoch': 2} {'type': 'loss', 'content': 0.14877012372016907, 'timestamp': '2025-10-01 04:30:03.377683', 'step': 13836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:03.411277', 'step': 13836, 'epoch': 2} {'type': 'loss', 'content': 0.08384472131729126, 'timestamp': '2025-10-01 04:30:03.413574', 'step': 13837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:03.445299', 'step': 13837, 'epoch': 2} {'type': 'loss', 'content': 0.08655566722154617, 'timestamp': '2025-10-01 04:30:03.448056', 'step': 13838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:03.482161', 'step': 13838, 'epoch': 2} {'type': 'loss', 'content': 0.12736287713050842, 'timestamp': '2025-10-01 04:30:03.484416', 'step': 13839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:03.515931', 'step': 13839, 'epoch': 2} {'type': 'loss', 'content': 0.11117703467607498, 'timestamp': '2025-10-01 04:30:03.539670', 'step': 13840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:03.584227', 'step': 13840, 'epoch': 2} {'type': 'loss', 'content': 0.0840856283903122, 'timestamp': '2025-10-01 04:30:03.586317', 'step': 13841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:03.617529', 'step': 13841, 'epoch': 2} {'type': 'loss', 'content': 0.10420145839452744, 'timestamp': '2025-10-01 04:30:03.620129', 'step': 13842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:03.650586', 'step': 13842, 'epoch': 2} {'type': 'loss', 'content': 0.0702156275510788, 'timestamp': '2025-10-01 04:30:03.654776', 'step': 13843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:03.689986', 'step': 13843, 'epoch': 2} {'type': 'loss', 'content': 0.07286956906318665, 'timestamp': '2025-10-01 04:30:03.713835', 'step': 13844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:03.746045', 'step': 13844, 'epoch': 2} {'type': 'loss', 'content': 0.10905849933624268, 'timestamp': '2025-10-01 04:30:03.748234', 'step': 13845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:03.778932', 'step': 13845, 'epoch': 2} {'type': 'loss', 'content': 0.16480234265327454, 'timestamp': '2025-10-01 04:30:03.781173', 'step': 13846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:03.812763', 'step': 13846, 'epoch': 2} {'type': 'loss', 'content': 0.15050747990608215, 'timestamp': '2025-10-01 04:30:03.816408', 'step': 13847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:03.849447', 'step': 13847, 'epoch': 2} {'type': 'loss', 'content': 0.18871842324733734, 'timestamp': '2025-10-01 04:30:03.873114', 'step': 13848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:03.904218', 'step': 13848, 'epoch': 2} {'type': 'loss', 'content': 0.10814788937568665, 'timestamp': '2025-10-01 04:30:03.908323', 'step': 13849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:03.941505', 'step': 13849, 'epoch': 2} {'type': 'loss', 'content': 0.080352284014225, 'timestamp': '2025-10-01 04:30:03.943948', 'step': 13850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:03.974344', 'step': 13850, 'epoch': 2} {'type': 'loss', 'content': 0.1254608929157257, 'timestamp': '2025-10-01 04:30:03.977003', 'step': 13851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:04.007418', 'step': 13851, 'epoch': 2} {'type': 'loss', 'content': 0.18940123915672302, 'timestamp': '2025-10-01 04:30:04.031951', 'step': 13852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.063295', 'step': 13852, 'epoch': 2} {'type': 'loss', 'content': 0.09104115515947342, 'timestamp': '2025-10-01 04:30:04.065648', 'step': 13853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:04.096477', 'step': 13853, 'epoch': 2} {'type': 'loss', 'content': 0.16550210118293762, 'timestamp': '2025-10-01 04:30:04.098774', 'step': 13854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:04.129221', 'step': 13854, 'epoch': 2} {'type': 'loss', 'content': 0.06204502657055855, 'timestamp': '2025-10-01 04:30:04.131616', 'step': 13855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:04.162577', 'step': 13855, 'epoch': 2} {'type': 'loss', 'content': 0.1691811978816986, 'timestamp': '2025-10-01 04:30:04.186335', 'step': 13856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:04.216552', 'step': 13856, 'epoch': 2} {'type': 'loss', 'content': 0.11658072471618652, 'timestamp': '2025-10-01 04:30:04.219441', 'step': 13857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.249611', 'step': 13857, 'epoch': 2} {'type': 'loss', 'content': 0.12050510197877884, 'timestamp': '2025-10-01 04:30:04.252057', 'step': 13858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:04.282973', 'step': 13858, 'epoch': 2} {'type': 'loss', 'content': 0.13726234436035156, 'timestamp': '2025-10-01 04:30:04.285050', 'step': 13859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.316414', 'step': 13859, 'epoch': 2} {'type': 'loss', 'content': 0.08449716120958328, 'timestamp': '2025-10-01 04:30:04.340236', 'step': 13860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.370816', 'step': 13860, 'epoch': 2} {'type': 'loss', 'content': 0.12587705254554749, 'timestamp': '2025-10-01 04:30:04.373063', 'step': 13861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.404236', 'step': 13861, 'epoch': 2} {'type': 'loss', 'content': 0.14406278729438782, 'timestamp': '2025-10-01 04:30:04.406207', 'step': 13862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.436682', 'step': 13862, 'epoch': 2} {'type': 'loss', 'content': 0.1419907659292221, 'timestamp': '2025-10-01 04:30:04.438895', 'step': 13863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:04.468866', 'step': 13863, 'epoch': 2} {'type': 'loss', 'content': 0.09438909590244293, 'timestamp': '2025-10-01 04:30:04.492514', 'step': 13864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:04.523000', 'step': 13864, 'epoch': 2} {'type': 'loss', 'content': 0.15081723034381866, 'timestamp': '2025-10-01 04:30:04.537045', 'step': 13865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.568522', 'step': 13865, 'epoch': 2} {'type': 'loss', 'content': 0.05132834240794182, 'timestamp': '2025-10-01 04:30:04.570597', 'step': 13866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:04.600596', 'step': 13866, 'epoch': 2} {'type': 'loss', 'content': 0.1106048971414566, 'timestamp': '2025-10-01 04:30:04.604381', 'step': 13867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:04.634832', 'step': 13867, 'epoch': 2} {'type': 'loss', 'content': 0.07277502119541168, 'timestamp': '2025-10-01 04:30:04.658412', 'step': 13868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.691096', 'step': 13868, 'epoch': 2} {'type': 'loss', 'content': 0.07480309903621674, 'timestamp': '2025-10-01 04:30:04.693582', 'step': 13869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.723942', 'step': 13869, 'epoch': 2} {'type': 'loss', 'content': 0.10221150517463684, 'timestamp': '2025-10-01 04:30:04.726153', 'step': 13870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.756449', 'step': 13870, 'epoch': 2} {'type': 'loss', 'content': 0.06514090299606323, 'timestamp': '2025-10-01 04:30:04.758578', 'step': 13871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:04.788956', 'step': 13871, 'epoch': 2} {'type': 'loss', 'content': 0.08110994100570679, 'timestamp': '2025-10-01 04:30:04.823805', 'step': 13872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:30:04.856283', 'step': 13872, 'epoch': 2} {'type': 'loss', 'content': 0.0713145062327385, 'timestamp': '2025-10-01 04:30:04.858712', 'step': 13873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:04.889674', 'step': 13873, 'epoch': 2} {'type': 'loss', 'content': 0.20023420453071594, 'timestamp': '2025-10-01 04:30:04.892091', 'step': 13874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:04.923551', 'step': 13874, 'epoch': 2} {'type': 'loss', 'content': 0.08719286322593689, 'timestamp': '2025-10-01 04:30:04.928218', 'step': 13875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:04.964851', 'step': 13875, 'epoch': 2} {'type': 'loss', 'content': 0.08451038599014282, 'timestamp': '2025-10-01 04:30:04.988826', 'step': 13876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:05.021676', 'step': 13876, 'epoch': 2} {'type': 'loss', 'content': 0.08263510465621948, 'timestamp': '2025-10-01 04:30:05.023975', 'step': 13877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.056643', 'step': 13877, 'epoch': 2} {'type': 'loss', 'content': 0.050865691155195236, 'timestamp': '2025-10-01 04:30:05.058887', 'step': 13878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.090891', 'step': 13878, 'epoch': 2} {'type': 'loss', 'content': 0.13741600513458252, 'timestamp': '2025-10-01 04:30:05.093601', 'step': 13879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.126689', 'step': 13879, 'epoch': 2} {'type': 'loss', 'content': 0.09483742713928223, 'timestamp': '2025-10-01 04:30:05.150307', 'step': 13880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.180896', 'step': 13880, 'epoch': 2} {'type': 'loss', 'content': 0.08248109370470047, 'timestamp': '2025-10-01 04:30:05.183163', 'step': 13881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.213383', 'step': 13881, 'epoch': 2} {'type': 'loss', 'content': 0.1510872095823288, 'timestamp': '2025-10-01 04:30:05.217134', 'step': 13882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:05.248739', 'step': 13882, 'epoch': 2} {'type': 'loss', 'content': 0.1140880212187767, 'timestamp': '2025-10-01 04:30:05.250865', 'step': 13883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:05.281956', 'step': 13883, 'epoch': 2} {'type': 'loss', 'content': 0.10241968184709549, 'timestamp': '2025-10-01 04:30:05.305588', 'step': 13884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:05.338444', 'step': 13884, 'epoch': 2} {'type': 'loss', 'content': 0.04931805655360222, 'timestamp': '2025-10-01 04:30:05.340664', 'step': 13885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:05.372992', 'step': 13885, 'epoch': 2} {'type': 'loss', 'content': 0.06952866166830063, 'timestamp': '2025-10-01 04:30:05.375331', 'step': 13886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:05.408900', 'step': 13886, 'epoch': 2} {'type': 'loss', 'content': 0.09581569582223892, 'timestamp': '2025-10-01 04:30:05.411179', 'step': 13887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:05.444609', 'step': 13887, 'epoch': 2} {'type': 'loss', 'content': 0.1037915050983429, 'timestamp': '2025-10-01 04:30:05.468509', 'step': 13888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.504811', 'step': 13888, 'epoch': 2} {'type': 'loss', 'content': 0.09056174755096436, 'timestamp': '2025-10-01 04:30:05.507335', 'step': 13889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:05.539797', 'step': 13889, 'epoch': 2} {'type': 'loss', 'content': 0.11719085276126862, 'timestamp': '2025-10-01 04:30:05.542672', 'step': 13890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.573976', 'step': 13890, 'epoch': 2} {'type': 'loss', 'content': 0.07872796058654785, 'timestamp': '2025-10-01 04:30:05.576062', 'step': 13891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:05.607539', 'step': 13891, 'epoch': 2} {'type': 'loss', 'content': 0.1154407411813736, 'timestamp': '2025-10-01 04:30:05.632185', 'step': 13892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:05.667583', 'step': 13892, 'epoch': 2} {'type': 'loss', 'content': 0.07371848821640015, 'timestamp': '2025-10-01 04:30:05.669751', 'step': 13893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:05.702026', 'step': 13893, 'epoch': 2} {'type': 'loss', 'content': 0.04689738154411316, 'timestamp': '2025-10-01 04:30:05.704183', 'step': 13894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:05.737485', 'step': 13894, 'epoch': 2} {'type': 'loss', 'content': 0.13124875724315643, 'timestamp': '2025-10-01 04:30:05.739804', 'step': 13895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.772216', 'step': 13895, 'epoch': 2} {'type': 'loss', 'content': 0.13952703773975372, 'timestamp': '2025-10-01 04:30:05.795802', 'step': 13896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:05.830344', 'step': 13896, 'epoch': 2} {'type': 'loss', 'content': 0.12324608862400055, 'timestamp': '2025-10-01 04:30:05.832625', 'step': 13897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.864450', 'step': 13897, 'epoch': 2} {'type': 'loss', 'content': 0.06801003217697144, 'timestamp': '2025-10-01 04:30:05.866751', 'step': 13898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.901452', 'step': 13898, 'epoch': 2} {'type': 'loss', 'content': 0.10615095496177673, 'timestamp': '2025-10-01 04:30:05.906240', 'step': 13899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:05.936995', 'step': 13899, 'epoch': 2} {'type': 'loss', 'content': 0.04476410523056984, 'timestamp': '2025-10-01 04:30:05.961018', 'step': 13900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:05.997107', 'step': 13900, 'epoch': 2} {'type': 'loss', 'content': 0.11364147067070007, 'timestamp': '2025-10-01 04:30:05.999448', 'step': 13901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:06.031657', 'step': 13901, 'epoch': 2} {'type': 'loss', 'content': 0.102266825735569, 'timestamp': '2025-10-01 04:30:06.033755', 'step': 13902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:06.064551', 'step': 13902, 'epoch': 2} {'type': 'loss', 'content': 0.057484082877635956, 'timestamp': '2025-10-01 04:30:06.066628', 'step': 13903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:06.099252', 'step': 13903, 'epoch': 2} {'type': 'loss', 'content': 0.09833502024412155, 'timestamp': '2025-10-01 04:30:06.122774', 'step': 13904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:06.155129', 'step': 13904, 'epoch': 2} {'type': 'loss', 'content': 0.11699090152978897, 'timestamp': '2025-10-01 04:30:06.157253', 'step': 13905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:06.189164', 'step': 13905, 'epoch': 2} {'type': 'loss', 'content': 0.14314649999141693, 'timestamp': '2025-10-01 04:30:06.192225', 'step': 13906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:06.225256', 'step': 13906, 'epoch': 2} {'type': 'loss', 'content': 0.18184667825698853, 'timestamp': '2025-10-01 04:30:06.227837', 'step': 13907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:06.260930', 'step': 13907, 'epoch': 2} {'type': 'loss', 'content': 0.12630513310432434, 'timestamp': '2025-10-01 04:30:06.286889', 'step': 13908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:06.318075', 'step': 13908, 'epoch': 2} {'type': 'loss', 'content': 0.07815681397914886, 'timestamp': '2025-10-01 04:30:06.322781', 'step': 13909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:06.354372', 'step': 13909, 'epoch': 2} {'type': 'loss', 'content': 0.06232466176152229, 'timestamp': '2025-10-01 04:30:06.357140', 'step': 13910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:06.389668', 'step': 13910, 'epoch': 2} {'type': 'loss', 'content': 0.045187149196863174, 'timestamp': '2025-10-01 04:30:06.399163', 'step': 13911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:06.444765', 'step': 13911, 'epoch': 2} {'type': 'loss', 'content': 0.07793426513671875, 'timestamp': '2025-10-01 04:30:06.468438', 'step': 13912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:06.501159', 'step': 13912, 'epoch': 2} {'type': 'loss', 'content': 0.08433370292186737, 'timestamp': '2025-10-01 04:30:06.505346', 'step': 13913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:06.536097', 'step': 13913, 'epoch': 2} {'type': 'loss', 'content': 0.10716211795806885, 'timestamp': '2025-10-01 04:30:06.538191', 'step': 13914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:06.571481', 'step': 13914, 'epoch': 2} {'type': 'loss', 'content': 0.08956795185804367, 'timestamp': '2025-10-01 04:30:06.574309', 'step': 13915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:06.606738', 'step': 13915, 'epoch': 2} {'type': 'loss', 'content': 0.10483891516923904, 'timestamp': '2025-10-01 04:30:06.630857', 'step': 13916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:06.665948', 'step': 13916, 'epoch': 2} {'type': 'loss', 'content': 0.08909684419631958, 'timestamp': '2025-10-01 04:30:06.668084', 'step': 13917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:06.700555', 'step': 13917, 'epoch': 2} {'type': 'loss', 'content': 0.040908657014369965, 'timestamp': '2025-10-01 04:30:06.711315', 'step': 13918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:06.746304', 'step': 13918, 'epoch': 2} {'type': 'loss', 'content': 0.15981610119342804, 'timestamp': '2025-10-01 04:30:06.748649', 'step': 13919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:06.779470', 'step': 13919, 'epoch': 2} {'type': 'loss', 'content': 0.06156408414244652, 'timestamp': '2025-10-01 04:30:06.803251', 'step': 13920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:06.834719', 'step': 13920, 'epoch': 2} {'type': 'loss', 'content': 0.08005549758672714, 'timestamp': '2025-10-01 04:30:06.837050', 'step': 13921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:06.867722', 'step': 13921, 'epoch': 2} {'type': 'loss', 'content': 0.06336579471826553, 'timestamp': '2025-10-01 04:30:06.870309', 'step': 13922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:06.901294', 'step': 13922, 'epoch': 2} {'type': 'loss', 'content': 0.1341761201620102, 'timestamp': '2025-10-01 04:30:06.904013', 'step': 13923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:06.935577', 'step': 13923, 'epoch': 2} {'type': 'loss', 'content': 0.1411994993686676, 'timestamp': '2025-10-01 04:30:06.959256', 'step': 13924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:06.990337', 'step': 13924, 'epoch': 2} {'type': 'loss', 'content': 0.11563055962324142, 'timestamp': '2025-10-01 04:30:06.993054', 'step': 13925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.032388', 'step': 13925, 'epoch': 2} {'type': 'loss', 'content': 0.03282035142183304, 'timestamp': '2025-10-01 04:30:07.034862', 'step': 13926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:07.065397', 'step': 13926, 'epoch': 2} {'type': 'loss', 'content': 0.06291002780199051, 'timestamp': '2025-10-01 04:30:07.068730', 'step': 13927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.099905', 'step': 13927, 'epoch': 2} {'type': 'loss', 'content': 0.13499966263771057, 'timestamp': '2025-10-01 04:30:07.123681', 'step': 13928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.154919', 'step': 13928, 'epoch': 2} {'type': 'loss', 'content': 0.06508753448724747, 'timestamp': '2025-10-01 04:30:07.158610', 'step': 13929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.188872', 'step': 13929, 'epoch': 2} {'type': 'loss', 'content': 0.040416281670331955, 'timestamp': '2025-10-01 04:30:07.191564', 'step': 13930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:07.222173', 'step': 13930, 'epoch': 2} {'type': 'loss', 'content': 0.10497412085533142, 'timestamp': '2025-10-01 04:30:07.224390', 'step': 13931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.255448', 'step': 13931, 'epoch': 2} {'type': 'loss', 'content': 0.05936906859278679, 'timestamp': '2025-10-01 04:30:07.279327', 'step': 13932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.313569', 'step': 13932, 'epoch': 2} {'type': 'loss', 'content': 0.08507291972637177, 'timestamp': '2025-10-01 04:30:07.316164', 'step': 13933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:07.347834', 'step': 13933, 'epoch': 2} {'type': 'loss', 'content': 0.07759718596935272, 'timestamp': '2025-10-01 04:30:07.350452', 'step': 13934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:07.380854', 'step': 13934, 'epoch': 2} {'type': 'loss', 'content': 0.11290966719388962, 'timestamp': '2025-10-01 04:30:07.383505', 'step': 13935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:07.414401', 'step': 13935, 'epoch': 2} {'type': 'loss', 'content': 0.048859477043151855, 'timestamp': '2025-10-01 04:30:07.438334', 'step': 13936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.469997', 'step': 13936, 'epoch': 2} {'type': 'loss', 'content': 0.08442959934473038, 'timestamp': '2025-10-01 04:30:07.472843', 'step': 13937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.504137', 'step': 13937, 'epoch': 2} {'type': 'loss', 'content': 0.07970989495515823, 'timestamp': '2025-10-01 04:30:07.506694', 'step': 13938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.537678', 'step': 13938, 'epoch': 2} {'type': 'loss', 'content': 0.10282732546329498, 'timestamp': '2025-10-01 04:30:07.540757', 'step': 13939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.573046', 'step': 13939, 'epoch': 2} {'type': 'loss', 'content': 0.11528193205595016, 'timestamp': '2025-10-01 04:30:07.613416', 'step': 13940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.645537', 'step': 13940, 'epoch': 2} {'type': 'loss', 'content': 0.11697560548782349, 'timestamp': '2025-10-01 04:30:07.648156', 'step': 13941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:07.680155', 'step': 13941, 'epoch': 2} {'type': 'loss', 'content': 0.10474919527769089, 'timestamp': '2025-10-01 04:30:07.683230', 'step': 13942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:07.715051', 'step': 13942, 'epoch': 2} {'type': 'loss', 'content': 0.10878659039735794, 'timestamp': '2025-10-01 04:30:07.717340', 'step': 13943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.750819', 'step': 13943, 'epoch': 2} {'type': 'loss', 'content': 0.09423431754112244, 'timestamp': '2025-10-01 04:30:07.774932', 'step': 13944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:07.805666', 'step': 13944, 'epoch': 2} {'type': 'loss', 'content': 0.12236659973859787, 'timestamp': '2025-10-01 04:30:07.808999', 'step': 13945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:07.840971', 'step': 13945, 'epoch': 2} {'type': 'loss', 'content': 0.08643049746751785, 'timestamp': '2025-10-01 04:30:07.843654', 'step': 13946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:07.876528', 'step': 13946, 'epoch': 2} {'type': 'loss', 'content': 0.11187226325273514, 'timestamp': '2025-10-01 04:30:07.879025', 'step': 13947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:07.909287', 'step': 13947, 'epoch': 2} {'type': 'loss', 'content': 0.0979953184723854, 'timestamp': '2025-10-01 04:30:07.932942', 'step': 13948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:07.963749', 'step': 13948, 'epoch': 2} {'type': 'loss', 'content': 0.10158286243677139, 'timestamp': '2025-10-01 04:30:07.966418', 'step': 13949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:07.999178', 'step': 13949, 'epoch': 2} {'type': 'loss', 'content': 0.09174589067697525, 'timestamp': '2025-10-01 04:30:08.001646', 'step': 13950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.032452', 'step': 13950, 'epoch': 2} {'type': 'loss', 'content': 0.08352036774158478, 'timestamp': '2025-10-01 04:30:08.035216', 'step': 13951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:08.066275', 'step': 13951, 'epoch': 2} {'type': 'loss', 'content': 0.10520538687705994, 'timestamp': '2025-10-01 04:30:08.091101', 'step': 13952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:08.122060', 'step': 13952, 'epoch': 2} {'type': 'loss', 'content': 0.14940382540225983, 'timestamp': '2025-10-01 04:30:08.124685', 'step': 13953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.156102', 'step': 13953, 'epoch': 2} {'type': 'loss', 'content': 0.08072517067193985, 'timestamp': '2025-10-01 04:30:08.158540', 'step': 13954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:08.189625', 'step': 13954, 'epoch': 2} {'type': 'loss', 'content': 0.12220136821269989, 'timestamp': '2025-10-01 04:30:08.191732', 'step': 13955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:08.222486', 'step': 13955, 'epoch': 2} {'type': 'loss', 'content': 0.12902399897575378, 'timestamp': '2025-10-01 04:30:08.246236', 'step': 13956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.277598', 'step': 13956, 'epoch': 2} {'type': 'loss', 'content': 0.098054438829422, 'timestamp': '2025-10-01 04:30:08.279770', 'step': 13957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:08.310446', 'step': 13957, 'epoch': 2} {'type': 'loss', 'content': 0.16191807389259338, 'timestamp': '2025-10-01 04:30:08.313134', 'step': 13958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.345207', 'step': 13958, 'epoch': 2} {'type': 'loss', 'content': 0.133340984582901, 'timestamp': '2025-10-01 04:30:08.348008', 'step': 13959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:08.378403', 'step': 13959, 'epoch': 2} {'type': 'loss', 'content': 0.1676463633775711, 'timestamp': '2025-10-01 04:30:08.402085', 'step': 13960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:08.432885', 'step': 13960, 'epoch': 2} {'type': 'loss', 'content': 0.11609906703233719, 'timestamp': '2025-10-01 04:30:08.449048', 'step': 13961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:08.480264', 'step': 13961, 'epoch': 2} {'type': 'loss', 'content': 0.1293019950389862, 'timestamp': '2025-10-01 04:30:08.482693', 'step': 13962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.512985', 'step': 13962, 'epoch': 2} {'type': 'loss', 'content': 0.11164140701293945, 'timestamp': '2025-10-01 04:30:08.515626', 'step': 13963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:08.559638', 'step': 13963, 'epoch': 2} {'type': 'loss', 'content': 0.1469191312789917, 'timestamp': '2025-10-01 04:30:08.583260', 'step': 13964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.613850', 'step': 13964, 'epoch': 2} {'type': 'loss', 'content': 0.06800703704357147, 'timestamp': '2025-10-01 04:30:08.616396', 'step': 13965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.647787', 'step': 13965, 'epoch': 2} {'type': 'loss', 'content': 0.06437114626169205, 'timestamp': '2025-10-01 04:30:08.655639', 'step': 13966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:08.686409', 'step': 13966, 'epoch': 2} {'type': 'loss', 'content': 0.10769949853420258, 'timestamp': '2025-10-01 04:30:08.689499', 'step': 13967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:08.720658', 'step': 13967, 'epoch': 2} {'type': 'loss', 'content': 0.12314558029174805, 'timestamp': '2025-10-01 04:30:08.744631', 'step': 13968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:08.775947', 'step': 13968, 'epoch': 2} {'type': 'loss', 'content': 0.11865770071744919, 'timestamp': '2025-10-01 04:30:08.779052', 'step': 13969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.808897', 'step': 13969, 'epoch': 2} {'type': 'loss', 'content': 0.0637916848063469, 'timestamp': '2025-10-01 04:30:08.811035', 'step': 13970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.841826', 'step': 13970, 'epoch': 2} {'type': 'loss', 'content': 0.09325183928012848, 'timestamp': '2025-10-01 04:30:08.844068', 'step': 13971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.874622', 'step': 13971, 'epoch': 2} {'type': 'loss', 'content': 0.14640748500823975, 'timestamp': '2025-10-01 04:30:08.898616', 'step': 13972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:08.929361', 'step': 13972, 'epoch': 2} {'type': 'loss', 'content': 0.0669984519481659, 'timestamp': '2025-10-01 04:30:08.931613', 'step': 13973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:08.968682', 'step': 13973, 'epoch': 2} {'type': 'loss', 'content': 0.1270419806241989, 'timestamp': '2025-10-01 04:30:08.971405', 'step': 13974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:09.001617', 'step': 13974, 'epoch': 2} {'type': 'loss', 'content': 0.13176105916500092, 'timestamp': '2025-10-01 04:30:09.006297', 'step': 13975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:09.037293', 'step': 13975, 'epoch': 2} {'type': 'loss', 'content': 0.17280931770801544, 'timestamp': '2025-10-01 04:30:09.060974', 'step': 13976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:09.090925', 'step': 13976, 'epoch': 2} {'type': 'loss', 'content': 0.0909125879406929, 'timestamp': '2025-10-01 04:30:09.093453', 'step': 13977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:09.125030', 'step': 13977, 'epoch': 2} {'type': 'loss', 'content': 0.11444493383169174, 'timestamp': '2025-10-01 04:30:09.127222', 'step': 13978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:09.157626', 'step': 13978, 'epoch': 2} {'type': 'loss', 'content': 0.14998933672904968, 'timestamp': '2025-10-01 04:30:09.159904', 'step': 13979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:09.190890', 'step': 13979, 'epoch': 2} {'type': 'loss', 'content': 0.10337403416633606, 'timestamp': '2025-10-01 04:30:09.214512', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:30:17.447762', 'step': 13980, 'epoch': 2} {'type': 'pplx', 'content': 9606.802126457322, 'timestamp': '2025-10-01 04:30:17.450508', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:17.480070', 'step': 13980, 'epoch': 2} {'type': 'loss', 'content': 0.05385957658290863, 'timestamp': '2025-10-01 04:30:17.482592', 'step': 13981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:17.513190', 'step': 13981, 'epoch': 2} {'type': 'loss', 'content': 0.10253845155239105, 'timestamp': '2025-10-01 04:30:17.519557', 'step': 13982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:17.551057', 'step': 13982, 'epoch': 2} {'type': 'loss', 'content': 0.1037522554397583, 'timestamp': '2025-10-01 04:30:17.553298', 'step': 13983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:17.584095', 'step': 13983, 'epoch': 2} {'type': 'loss', 'content': 0.11720578372478485, 'timestamp': '2025-10-01 04:30:17.607873', 'step': 13984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:17.641238', 'step': 13984, 'epoch': 2} {'type': 'loss', 'content': 0.09981430321931839, 'timestamp': '2025-10-01 04:30:17.643516', 'step': 13985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:17.675352', 'step': 13985, 'epoch': 2} {'type': 'loss', 'content': 0.047584909945726395, 'timestamp': '2025-10-01 04:30:17.677755', 'step': 13986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:17.709843', 'step': 13986, 'epoch': 2} {'type': 'loss', 'content': 0.16067340970039368, 'timestamp': '2025-10-01 04:30:17.712006', 'step': 13987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:17.743934', 'step': 13987, 'epoch': 2} {'type': 'loss', 'content': 0.0778818354010582, 'timestamp': '2025-10-01 04:30:17.767806', 'step': 13988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:17.799301', 'step': 13988, 'epoch': 2} {'type': 'loss', 'content': 0.09903821349143982, 'timestamp': '2025-10-01 04:30:17.801480', 'step': 13989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:17.833400', 'step': 13989, 'epoch': 2} {'type': 'loss', 'content': 0.18573874235153198, 'timestamp': '2025-10-01 04:30:17.836140', 'step': 13990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:17.869363', 'step': 13990, 'epoch': 2} {'type': 'loss', 'content': 0.1275777518749237, 'timestamp': '2025-10-01 04:30:17.872304', 'step': 13991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:17.903837', 'step': 13991, 'epoch': 2} {'type': 'loss', 'content': 0.08585377037525177, 'timestamp': '2025-10-01 04:30:17.931051', 'step': 13992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:17.963539', 'step': 13992, 'epoch': 2} {'type': 'loss', 'content': 0.0704430341720581, 'timestamp': '2025-10-01 04:30:17.965763', 'step': 13993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:17.998196', 'step': 13993, 'epoch': 2} {'type': 'loss', 'content': 0.10215018689632416, 'timestamp': '2025-10-01 04:30:18.000397', 'step': 13994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:18.040565', 'step': 13994, 'epoch': 2} {'type': 'loss', 'content': 0.029644308611750603, 'timestamp': '2025-10-01 04:30:18.044703', 'step': 13995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:18.076368', 'step': 13995, 'epoch': 2} {'type': 'loss', 'content': 0.07852614670991898, 'timestamp': '2025-10-01 04:30:18.100253', 'step': 13996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:18.138430', 'step': 13996, 'epoch': 2} {'type': 'loss', 'content': 0.01743372157216072, 'timestamp': '2025-10-01 04:30:18.140708', 'step': 13997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:18.171352', 'step': 13997, 'epoch': 2} {'type': 'loss', 'content': 0.0910632386803627, 'timestamp': '2025-10-01 04:30:18.173543', 'step': 13998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:18.204012', 'step': 13998, 'epoch': 2} {'type': 'loss', 'content': 0.1394437700510025, 'timestamp': '2025-10-01 04:30:18.206166', 'step': 13999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:18.239612', 'step': 13999, 'epoch': 2} {'type': 'loss', 'content': 0.1082405298948288, 'timestamp': '2025-10-01 04:30:18.265231', 'step': 14000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14000', 'timestamp': '2025-10-01 04:30:23.378682', 'step': 14000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:23.431599', 'step': 14000, 'epoch': 2} {'type': 'loss', 'content': 0.07857011258602142, 'timestamp': '2025-10-01 04:30:23.434104', 'step': 14001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:23.478028', 'step': 14001, 'epoch': 2} {'type': 'loss', 'content': 0.08758366852998734, 'timestamp': '2025-10-01 04:30:23.480789', 'step': 14002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:23.521929', 'step': 14002, 'epoch': 2} {'type': 'loss', 'content': 0.11560029536485672, 'timestamp': '2025-10-01 04:30:23.524213', 'step': 14003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:23.556010', 'step': 14003, 'epoch': 2} {'type': 'loss', 'content': 0.09069991856813431, 'timestamp': '2025-10-01 04:30:23.580612', 'step': 14004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:23.612142', 'step': 14004, 'epoch': 2} {'type': 'loss', 'content': 0.06272364407777786, 'timestamp': '2025-10-01 04:30:23.614524', 'step': 14005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:23.645156', 'step': 14005, 'epoch': 2} {'type': 'loss', 'content': 0.06490199267864227, 'timestamp': '2025-10-01 04:30:23.647476', 'step': 14006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:23.679945', 'step': 14006, 'epoch': 2} {'type': 'loss', 'content': 0.056484006345272064, 'timestamp': '2025-10-01 04:30:23.682244', 'step': 14007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:23.717355', 'step': 14007, 'epoch': 2} {'type': 'loss', 'content': 0.12479880452156067, 'timestamp': '2025-10-01 04:30:23.741448', 'step': 14008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:23.773684', 'step': 14008, 'epoch': 2} {'type': 'loss', 'content': 0.08643216639757156, 'timestamp': '2025-10-01 04:30:23.775979', 'step': 14009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:23.808348', 'step': 14009, 'epoch': 2} {'type': 'loss', 'content': 0.0944543108344078, 'timestamp': '2025-10-01 04:30:23.811043', 'step': 14010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:23.842305', 'step': 14010, 'epoch': 2} {'type': 'loss', 'content': 0.14314401149749756, 'timestamp': '2025-10-01 04:30:23.850908', 'step': 14011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:23.882206', 'step': 14011, 'epoch': 2} {'type': 'loss', 'content': 0.11323116719722748, 'timestamp': '2025-10-01 04:30:23.908193', 'step': 14012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:23.939886', 'step': 14012, 'epoch': 2} {'type': 'loss', 'content': 0.11664506793022156, 'timestamp': '2025-10-01 04:30:23.941948', 'step': 14013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:23.972010', 'step': 14013, 'epoch': 2} {'type': 'loss', 'content': 0.0188339464366436, 'timestamp': '2025-10-01 04:30:23.974192', 'step': 14014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.005568', 'step': 14014, 'epoch': 2} {'type': 'loss', 'content': 0.11196887493133545, 'timestamp': '2025-10-01 04:30:24.012312', 'step': 14015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:24.044026', 'step': 14015, 'epoch': 2} {'type': 'loss', 'content': 0.19385366141796112, 'timestamp': '2025-10-01 04:30:24.070664', 'step': 14016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:24.104204', 'step': 14016, 'epoch': 2} {'type': 'loss', 'content': 0.0844259262084961, 'timestamp': '2025-10-01 04:30:24.106137', 'step': 14017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:30:24.138139', 'step': 14017, 'epoch': 2} {'type': 'loss', 'content': 0.08028338849544525, 'timestamp': '2025-10-01 04:30:24.142746', 'step': 14018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:24.177171', 'step': 14018, 'epoch': 2} {'type': 'loss', 'content': 0.09922736883163452, 'timestamp': '2025-10-01 04:30:24.179951', 'step': 14019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.220992', 'step': 14019, 'epoch': 2} {'type': 'loss', 'content': 0.0912112146615982, 'timestamp': '2025-10-01 04:30:24.244994', 'step': 14020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.280871', 'step': 14020, 'epoch': 2} {'type': 'loss', 'content': 0.09323268383741379, 'timestamp': '2025-10-01 04:30:24.283093', 'step': 14021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.314434', 'step': 14021, 'epoch': 2} {'type': 'loss', 'content': 0.13043564558029175, 'timestamp': '2025-10-01 04:30:24.316668', 'step': 14022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.348140', 'step': 14022, 'epoch': 2} {'type': 'loss', 'content': 0.11890953034162521, 'timestamp': '2025-10-01 04:30:24.350794', 'step': 14023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:24.381103', 'step': 14023, 'epoch': 2} {'type': 'loss', 'content': 0.12113579362630844, 'timestamp': '2025-10-01 04:30:24.405258', 'step': 14024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:24.435754', 'step': 14024, 'epoch': 2} {'type': 'loss', 'content': 0.10049307346343994, 'timestamp': '2025-10-01 04:30:24.438030', 'step': 14025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.468686', 'step': 14025, 'epoch': 2} {'type': 'loss', 'content': 0.12512437999248505, 'timestamp': '2025-10-01 04:30:24.470849', 'step': 14026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.502684', 'step': 14026, 'epoch': 2} {'type': 'loss', 'content': 0.09369835257530212, 'timestamp': '2025-10-01 04:30:24.504450', 'step': 14027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.535835', 'step': 14027, 'epoch': 2} {'type': 'loss', 'content': 0.08895716071128845, 'timestamp': '2025-10-01 04:30:24.559439', 'step': 14028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.589769', 'step': 14028, 'epoch': 2} {'type': 'loss', 'content': 0.10802566260099411, 'timestamp': '2025-10-01 04:30:24.592653', 'step': 14029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:24.626706', 'step': 14029, 'epoch': 2} {'type': 'loss', 'content': 0.16123098134994507, 'timestamp': '2025-10-01 04:30:24.629454', 'step': 14030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:24.660172', 'step': 14030, 'epoch': 2} {'type': 'loss', 'content': 0.12449602782726288, 'timestamp': '2025-10-01 04:30:24.662623', 'step': 14031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:24.692842', 'step': 14031, 'epoch': 2} {'type': 'loss', 'content': 0.09703020006418228, 'timestamp': '2025-10-01 04:30:24.716398', 'step': 14032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.746862', 'step': 14032, 'epoch': 2} {'type': 'loss', 'content': 0.06295154243707657, 'timestamp': '2025-10-01 04:30:24.748883', 'step': 14033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:24.779188', 'step': 14033, 'epoch': 2} {'type': 'loss', 'content': 0.13614314794540405, 'timestamp': '2025-10-01 04:30:24.781645', 'step': 14034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.811684', 'step': 14034, 'epoch': 2} {'type': 'loss', 'content': 0.15274223685264587, 'timestamp': '2025-10-01 04:30:24.813737', 'step': 14035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:24.844351', 'step': 14035, 'epoch': 2} {'type': 'loss', 'content': 0.09110885858535767, 'timestamp': '2025-10-01 04:30:24.867681', 'step': 14036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:24.899089', 'step': 14036, 'epoch': 2} {'type': 'loss', 'content': 0.07546333968639374, 'timestamp': '2025-10-01 04:30:24.901417', 'step': 14037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:24.931926', 'step': 14037, 'epoch': 2} {'type': 'loss', 'content': 0.09004778414964676, 'timestamp': '2025-10-01 04:30:24.934047', 'step': 14038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:24.972209', 'step': 14038, 'epoch': 2} {'type': 'loss', 'content': 0.07897749543190002, 'timestamp': '2025-10-01 04:30:24.974064', 'step': 14039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:25.004063', 'step': 14039, 'epoch': 2} {'type': 'loss', 'content': 0.1433885544538498, 'timestamp': '2025-10-01 04:30:25.027919', 'step': 14040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:25.058334', 'step': 14040, 'epoch': 2} {'type': 'loss', 'content': 0.15166451036930084, 'timestamp': '2025-10-01 04:30:25.060232', 'step': 14041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:25.091198', 'step': 14041, 'epoch': 2} {'type': 'loss', 'content': 0.07250003516674042, 'timestamp': '2025-10-01 04:30:25.093242', 'step': 14042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:25.124834', 'step': 14042, 'epoch': 2} {'type': 'loss', 'content': 0.07917360216379166, 'timestamp': '2025-10-01 04:30:25.126917', 'step': 14043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:25.163917', 'step': 14043, 'epoch': 2} {'type': 'loss', 'content': 0.12934346497058868, 'timestamp': '2025-10-01 04:30:25.188171', 'step': 14044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:25.220113', 'step': 14044, 'epoch': 2} {'type': 'loss', 'content': 0.16787602007389069, 'timestamp': '2025-10-01 04:30:25.222730', 'step': 14045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:25.254054', 'step': 14045, 'epoch': 2} {'type': 'loss', 'content': 0.042824309319257736, 'timestamp': '2025-10-01 04:30:25.256249', 'step': 14046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:25.287509', 'step': 14046, 'epoch': 2} {'type': 'loss', 'content': 0.09806536138057709, 'timestamp': '2025-10-01 04:30:25.291182', 'step': 14047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:25.328279', 'step': 14047, 'epoch': 2} {'type': 'loss', 'content': 0.14580488204956055, 'timestamp': '2025-10-01 04:30:25.351689', 'step': 14048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:25.382262', 'step': 14048, 'epoch': 2} {'type': 'loss', 'content': 0.11490471661090851, 'timestamp': '2025-10-01 04:30:25.384305', 'step': 14049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:25.415433', 'step': 14049, 'epoch': 2} {'type': 'loss', 'content': 0.10214584320783615, 'timestamp': '2025-10-01 04:30:25.417756', 'step': 14050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:25.448992', 'step': 14050, 'epoch': 2} {'type': 'loss', 'content': 0.07192620635032654, 'timestamp': '2025-10-01 04:30:25.451116', 'step': 14051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:25.482072', 'step': 14051, 'epoch': 2} {'type': 'loss', 'content': 0.18627464771270752, 'timestamp': '2025-10-01 04:30:25.505636', 'step': 14052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:25.538031', 'step': 14052, 'epoch': 2} {'type': 'loss', 'content': 0.0753602534532547, 'timestamp': '2025-10-01 04:30:25.540202', 'step': 14053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:25.570141', 'step': 14053, 'epoch': 2} {'type': 'loss', 'content': 0.09675852954387665, 'timestamp': '2025-10-01 04:30:25.572249', 'step': 14054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:25.603705', 'step': 14054, 'epoch': 2} {'type': 'loss', 'content': 0.11285562813282013, 'timestamp': '2025-10-01 04:30:25.605626', 'step': 14055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:25.638355', 'step': 14055, 'epoch': 2} {'type': 'loss', 'content': 0.08585283905267715, 'timestamp': '2025-10-01 04:30:25.661953', 'step': 14056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:25.692202', 'step': 14056, 'epoch': 2} {'type': 'loss', 'content': 0.10994042456150055, 'timestamp': '2025-10-01 04:30:25.694170', 'step': 14057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:25.724024', 'step': 14057, 'epoch': 2} {'type': 'loss', 'content': 0.029635699465870857, 'timestamp': '2025-10-01 04:30:25.725964', 'step': 14058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:25.757129', 'step': 14058, 'epoch': 2} {'type': 'loss', 'content': 0.05551602318882942, 'timestamp': '2025-10-01 04:30:25.759180', 'step': 14059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:25.788912', 'step': 14059, 'epoch': 2} {'type': 'loss', 'content': 0.059901971369981766, 'timestamp': '2025-10-01 04:30:25.813091', 'step': 14060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:25.844499', 'step': 14060, 'epoch': 2} {'type': 'loss', 'content': 0.062256909906864166, 'timestamp': '2025-10-01 04:30:25.846430', 'step': 14061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:25.876476', 'step': 14061, 'epoch': 2} {'type': 'loss', 'content': 0.07614190131425858, 'timestamp': '2025-10-01 04:30:25.879103', 'step': 14062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:25.909352', 'step': 14062, 'epoch': 2} {'type': 'loss', 'content': 0.11250107735395432, 'timestamp': '2025-10-01 04:30:25.911658', 'step': 14063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:25.943600', 'step': 14063, 'epoch': 2} {'type': 'loss', 'content': 0.16166143119335175, 'timestamp': '2025-10-01 04:30:25.972129', 'step': 14064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:26.002099', 'step': 14064, 'epoch': 2} {'type': 'loss', 'content': 0.04803793877363205, 'timestamp': '2025-10-01 04:30:26.004265', 'step': 14065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.035627', 'step': 14065, 'epoch': 2} {'type': 'loss', 'content': 0.11619771271944046, 'timestamp': '2025-10-01 04:30:26.038028', 'step': 14066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:26.075660', 'step': 14066, 'epoch': 2} {'type': 'loss', 'content': 0.06668747216463089, 'timestamp': '2025-10-01 04:30:26.078493', 'step': 14067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.109450', 'step': 14067, 'epoch': 2} {'type': 'loss', 'content': 0.08602748066186905, 'timestamp': '2025-10-01 04:30:26.134264', 'step': 14068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:26.166037', 'step': 14068, 'epoch': 2} {'type': 'loss', 'content': 0.12491454929113388, 'timestamp': '2025-10-01 04:30:26.168030', 'step': 14069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:26.198805', 'step': 14069, 'epoch': 2} {'type': 'loss', 'content': 0.1535068303346634, 'timestamp': '2025-10-01 04:30:26.200958', 'step': 14070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:26.231475', 'step': 14070, 'epoch': 2} {'type': 'loss', 'content': 0.121296726167202, 'timestamp': '2025-10-01 04:30:26.240369', 'step': 14071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.270795', 'step': 14071, 'epoch': 2} {'type': 'loss', 'content': 0.0988946482539177, 'timestamp': '2025-10-01 04:30:26.294398', 'step': 14072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.324613', 'step': 14072, 'epoch': 2} {'type': 'loss', 'content': 0.09078823775053024, 'timestamp': '2025-10-01 04:30:26.326920', 'step': 14073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.357694', 'step': 14073, 'epoch': 2} {'type': 'loss', 'content': 0.0987086072564125, 'timestamp': '2025-10-01 04:30:26.359859', 'step': 14074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.390963', 'step': 14074, 'epoch': 2} {'type': 'loss', 'content': 0.06955181062221527, 'timestamp': '2025-10-01 04:30:26.393093', 'step': 14075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.426268', 'step': 14075, 'epoch': 2} {'type': 'loss', 'content': 0.13518236577510834, 'timestamp': '2025-10-01 04:30:26.449885', 'step': 14076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:26.480955', 'step': 14076, 'epoch': 2} {'type': 'loss', 'content': 0.06334932893514633, 'timestamp': '2025-10-01 04:30:26.483006', 'step': 14077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:26.513626', 'step': 14077, 'epoch': 2} {'type': 'loss', 'content': 0.03225196525454521, 'timestamp': '2025-10-01 04:30:26.520953', 'step': 14078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.551135', 'step': 14078, 'epoch': 2} {'type': 'loss', 'content': 0.08514957875013351, 'timestamp': '2025-10-01 04:30:26.553148', 'step': 14079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.583785', 'step': 14079, 'epoch': 2} {'type': 'loss', 'content': 0.06969158351421356, 'timestamp': '2025-10-01 04:30:26.607366', 'step': 14080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:26.637909', 'step': 14080, 'epoch': 2} {'type': 'loss', 'content': 0.08375135809183121, 'timestamp': '2025-10-01 04:30:26.639919', 'step': 14081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:26.677353', 'step': 14081, 'epoch': 2} {'type': 'loss', 'content': 0.10891848802566528, 'timestamp': '2025-10-01 04:30:26.679508', 'step': 14082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.709869', 'step': 14082, 'epoch': 2} {'type': 'loss', 'content': 0.09195945411920547, 'timestamp': '2025-10-01 04:30:26.712013', 'step': 14083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.742403', 'step': 14083, 'epoch': 2} {'type': 'loss', 'content': 0.0677867978811264, 'timestamp': '2025-10-01 04:30:26.766022', 'step': 14084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:26.796033', 'step': 14084, 'epoch': 2} {'type': 'loss', 'content': 0.14799147844314575, 'timestamp': '2025-10-01 04:30:26.798105', 'step': 14085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.830967', 'step': 14085, 'epoch': 2} {'type': 'loss', 'content': 0.08177372813224792, 'timestamp': '2025-10-01 04:30:26.832963', 'step': 14086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:26.863998', 'step': 14086, 'epoch': 2} {'type': 'loss', 'content': 0.042844630777835846, 'timestamp': '2025-10-01 04:30:26.866182', 'step': 14087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:26.896578', 'step': 14087, 'epoch': 2} {'type': 'loss', 'content': 0.10441207885742188, 'timestamp': '2025-10-01 04:30:26.920276', 'step': 14088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.951166', 'step': 14088, 'epoch': 2} {'type': 'loss', 'content': 0.17227579653263092, 'timestamp': '2025-10-01 04:30:26.953232', 'step': 14089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:26.984586', 'step': 14089, 'epoch': 2} {'type': 'loss', 'content': 0.11433370411396027, 'timestamp': '2025-10-01 04:30:26.986607', 'step': 14090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:27.017415', 'step': 14090, 'epoch': 2} {'type': 'loss', 'content': 0.09637562930583954, 'timestamp': '2025-10-01 04:30:27.019543', 'step': 14091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:27.051745', 'step': 14091, 'epoch': 2} {'type': 'loss', 'content': 0.08877645432949066, 'timestamp': '2025-10-01 04:30:27.075368', 'step': 14092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:27.107272', 'step': 14092, 'epoch': 2} {'type': 'loss', 'content': 0.0882553830742836, 'timestamp': '2025-10-01 04:30:27.109645', 'step': 14093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:27.140302', 'step': 14093, 'epoch': 2} {'type': 'loss', 'content': 0.09289588034152985, 'timestamp': '2025-10-01 04:30:27.142515', 'step': 14094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:27.182067', 'step': 14094, 'epoch': 2} {'type': 'loss', 'content': 0.132691890001297, 'timestamp': '2025-10-01 04:30:27.183988', 'step': 14095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:27.214014', 'step': 14095, 'epoch': 2} {'type': 'loss', 'content': 0.08598605543375015, 'timestamp': '2025-10-01 04:30:27.237588', 'step': 14096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:27.270561', 'step': 14096, 'epoch': 2} {'type': 'loss', 'content': 0.022720005363225937, 'timestamp': '2025-10-01 04:30:27.272733', 'step': 14097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:27.303780', 'step': 14097, 'epoch': 2} {'type': 'loss', 'content': 0.08076849579811096, 'timestamp': '2025-10-01 04:30:27.305867', 'step': 14098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:27.336522', 'step': 14098, 'epoch': 2} {'type': 'loss', 'content': 0.10105488449335098, 'timestamp': '2025-10-01 04:30:27.338586', 'step': 14099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:27.370264', 'step': 14099, 'epoch': 2} {'type': 'loss', 'content': 0.13550567626953125, 'timestamp': '2025-10-01 04:30:27.393816', 'step': 14100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:27.424378', 'step': 14100, 'epoch': 2} {'type': 'loss', 'content': 0.18162524700164795, 'timestamp': '2025-10-01 04:30:27.426405', 'step': 14101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:27.457144', 'step': 14101, 'epoch': 2} {'type': 'loss', 'content': 0.03829801082611084, 'timestamp': '2025-10-01 04:30:27.459178', 'step': 14102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:30:27.489547', 'step': 14102, 'epoch': 2} {'type': 'loss', 'content': 0.1552596539258957, 'timestamp': '2025-10-01 04:30:27.494058', 'step': 14103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:27.524595', 'step': 14103, 'epoch': 2} {'type': 'loss', 'content': 0.10645715892314911, 'timestamp': '2025-10-01 04:30:27.548138', 'step': 14104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:27.579019', 'step': 14104, 'epoch': 2} {'type': 'loss', 'content': 0.05258253961801529, 'timestamp': '2025-10-01 04:30:27.580890', 'step': 14105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:27.611428', 'step': 14105, 'epoch': 2} {'type': 'loss', 'content': 0.041320424526929855, 'timestamp': '2025-10-01 04:30:27.613408', 'step': 14106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:27.643523', 'step': 14106, 'epoch': 2} {'type': 'loss', 'content': 0.037246014922857285, 'timestamp': '2025-10-01 04:30:27.645536', 'step': 14107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:27.675872', 'step': 14107, 'epoch': 2} {'type': 'loss', 'content': 0.05972594767808914, 'timestamp': '2025-10-01 04:30:27.699837', 'step': 14108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:27.731235', 'step': 14108, 'epoch': 2} {'type': 'loss', 'content': 0.1703609973192215, 'timestamp': '2025-10-01 04:30:27.733254', 'step': 14109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:27.763919', 'step': 14109, 'epoch': 2} {'type': 'loss', 'content': 0.10235271602869034, 'timestamp': '2025-10-01 04:30:27.765933', 'step': 14110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:27.796406', 'step': 14110, 'epoch': 2} {'type': 'loss', 'content': 0.055171314626932144, 'timestamp': '2025-10-01 04:30:27.798869', 'step': 14111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:27.831885', 'step': 14111, 'epoch': 2} {'type': 'loss', 'content': 0.1252865493297577, 'timestamp': '2025-10-01 04:30:27.855685', 'step': 14112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:27.886359', 'step': 14112, 'epoch': 2} {'type': 'loss', 'content': 0.09733285754919052, 'timestamp': '2025-10-01 04:30:27.888497', 'step': 14113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:27.918469', 'step': 14113, 'epoch': 2} {'type': 'loss', 'content': 0.07785974442958832, 'timestamp': '2025-10-01 04:30:27.920661', 'step': 14114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:27.951242', 'step': 14114, 'epoch': 2} {'type': 'loss', 'content': 0.1424914449453354, 'timestamp': '2025-10-01 04:30:27.953351', 'step': 14115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:27.984028', 'step': 14115, 'epoch': 2} {'type': 'loss', 'content': 0.09599553793668747, 'timestamp': '2025-10-01 04:30:28.007601', 'step': 14116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:28.038244', 'step': 14116, 'epoch': 2} {'type': 'loss', 'content': 0.07748857140541077, 'timestamp': '2025-10-01 04:30:28.040276', 'step': 14117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:28.071942', 'step': 14117, 'epoch': 2} {'type': 'loss', 'content': 0.16733987629413605, 'timestamp': '2025-10-01 04:30:28.074201', 'step': 14118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:28.104880', 'step': 14118, 'epoch': 2} {'type': 'loss', 'content': 0.07723412662744522, 'timestamp': '2025-10-01 04:30:28.106935', 'step': 14119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:28.137677', 'step': 14119, 'epoch': 2} {'type': 'loss', 'content': 0.1337963491678238, 'timestamp': '2025-10-01 04:30:28.161358', 'step': 14120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:28.191975', 'step': 14120, 'epoch': 2} {'type': 'loss', 'content': 0.022831527516245842, 'timestamp': '2025-10-01 04:30:28.193970', 'step': 14121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:28.224453', 'step': 14121, 'epoch': 2} {'type': 'loss', 'content': 0.06389539688825607, 'timestamp': '2025-10-01 04:30:28.226854', 'step': 14122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:28.256932', 'step': 14122, 'epoch': 2} {'type': 'loss', 'content': 0.04574566334486008, 'timestamp': '2025-10-01 04:30:28.259042', 'step': 14123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:28.297681', 'step': 14123, 'epoch': 2} {'type': 'loss', 'content': 0.09235037863254547, 'timestamp': '2025-10-01 04:30:28.321153', 'step': 14124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:28.351249', 'step': 14124, 'epoch': 2} {'type': 'loss', 'content': 0.041740935295820236, 'timestamp': '2025-10-01 04:30:28.353071', 'step': 14125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:28.388452', 'step': 14125, 'epoch': 2} {'type': 'loss', 'content': 0.0409969724714756, 'timestamp': '2025-10-01 04:30:28.390637', 'step': 14126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:28.421380', 'step': 14126, 'epoch': 2} {'type': 'loss', 'content': 0.2774120271205902, 'timestamp': '2025-10-01 04:30:28.423518', 'step': 14127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:28.484282', 'step': 14127, 'epoch': 2} {'type': 'loss', 'content': 0.09857147932052612, 'timestamp': '2025-10-01 04:30:28.522357', 'step': 14128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:28.552710', 'step': 14128, 'epoch': 2} {'type': 'loss', 'content': 0.07973083108663559, 'timestamp': '2025-10-01 04:30:28.555441', 'step': 14129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:28.599892', 'step': 14129, 'epoch': 2} {'type': 'loss', 'content': 0.0901089683175087, 'timestamp': '2025-10-01 04:30:28.601917', 'step': 14130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:28.632369', 'step': 14130, 'epoch': 2} {'type': 'loss', 'content': 0.008655915968120098, 'timestamp': '2025-10-01 04:30:28.650357', 'step': 14131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:28.681794', 'step': 14131, 'epoch': 2} {'type': 'loss', 'content': 0.058638229966163635, 'timestamp': '2025-10-01 04:30:28.705438', 'step': 14132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:28.758077', 'step': 14132, 'epoch': 2} {'type': 'loss', 'content': 0.0937875360250473, 'timestamp': '2025-10-01 04:30:28.760090', 'step': 14133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:28.790330', 'step': 14133, 'epoch': 2} {'type': 'loss', 'content': 0.1730322688817978, 'timestamp': '2025-10-01 04:30:28.792649', 'step': 14134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:28.823551', 'step': 14134, 'epoch': 2} {'type': 'loss', 'content': 0.1350739300251007, 'timestamp': '2025-10-01 04:30:28.833574', 'step': 14135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:28.873668', 'step': 14135, 'epoch': 2} {'type': 'loss', 'content': 0.08505114167928696, 'timestamp': '2025-10-01 04:30:28.910822', 'step': 14136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:28.947800', 'step': 14136, 'epoch': 2} {'type': 'loss', 'content': 0.057085826992988586, 'timestamp': '2025-10-01 04:30:28.951506', 'step': 14137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:28.992746', 'step': 14137, 'epoch': 2} {'type': 'loss', 'content': 0.102841317653656, 'timestamp': '2025-10-01 04:30:28.995543', 'step': 14138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:29.025921', 'step': 14138, 'epoch': 2} {'type': 'loss', 'content': 0.20673611760139465, 'timestamp': '2025-10-01 04:30:29.041119', 'step': 14139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:29.076738', 'step': 14139, 'epoch': 2} {'type': 'loss', 'content': 0.07932176440954208, 'timestamp': '2025-10-01 04:30:29.101396', 'step': 14140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:29.138153', 'step': 14140, 'epoch': 2} {'type': 'loss', 'content': 0.06350978463888168, 'timestamp': '2025-10-01 04:30:29.144430', 'step': 14141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:29.191140', 'step': 14141, 'epoch': 2} {'type': 'loss', 'content': 0.09406885504722595, 'timestamp': '2025-10-01 04:30:29.193305', 'step': 14142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:29.224816', 'step': 14142, 'epoch': 2} {'type': 'loss', 'content': 0.15421739220619202, 'timestamp': '2025-10-01 04:30:29.227330', 'step': 14143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:29.259118', 'step': 14143, 'epoch': 2} {'type': 'loss', 'content': 0.11738375574350357, 'timestamp': '2025-10-01 04:30:29.283532', 'step': 14144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:29.321502', 'step': 14144, 'epoch': 2} {'type': 'loss', 'content': 0.14335474371910095, 'timestamp': '2025-10-01 04:30:29.330558', 'step': 14145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:29.365442', 'step': 14145, 'epoch': 2} {'type': 'loss', 'content': 0.05113091319799423, 'timestamp': '2025-10-01 04:30:29.374176', 'step': 14146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:29.435180', 'step': 14146, 'epoch': 2} {'type': 'loss', 'content': 0.057415105402469635, 'timestamp': '2025-10-01 04:30:29.442545', 'step': 14147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:29.481953', 'step': 14147, 'epoch': 2} {'type': 'loss', 'content': 0.08466361463069916, 'timestamp': '2025-10-01 04:30:29.510979', 'step': 14148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:29.543571', 'step': 14148, 'epoch': 2} {'type': 'loss', 'content': 0.16087879240512848, 'timestamp': '2025-10-01 04:30:29.546387', 'step': 14149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:29.594442', 'step': 14149, 'epoch': 2} {'type': 'loss', 'content': 0.17840875685214996, 'timestamp': '2025-10-01 04:30:29.597006', 'step': 14150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:29.631543', 'step': 14150, 'epoch': 2} {'type': 'loss', 'content': 0.11474357545375824, 'timestamp': '2025-10-01 04:30:29.634868', 'step': 14151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:29.670895', 'step': 14151, 'epoch': 2} {'type': 'loss', 'content': 0.052981868386268616, 'timestamp': '2025-10-01 04:30:29.695280', 'step': 14152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:29.740143', 'step': 14152, 'epoch': 2} {'type': 'loss', 'content': 0.1068633422255516, 'timestamp': '2025-10-01 04:30:29.742230', 'step': 14153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:29.775631', 'step': 14153, 'epoch': 2} {'type': 'loss', 'content': 0.07212306559085846, 'timestamp': '2025-10-01 04:30:29.778931', 'step': 14154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:29.827094', 'step': 14154, 'epoch': 2} {'type': 'loss', 'content': 0.0840047299861908, 'timestamp': '2025-10-01 04:30:29.831217', 'step': 14155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:29.867908', 'step': 14155, 'epoch': 2} {'type': 'loss', 'content': 0.10576324164867401, 'timestamp': '2025-10-01 04:30:29.893256', 'step': 14156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:29.937705', 'step': 14156, 'epoch': 2} {'type': 'loss', 'content': 0.13280199468135834, 'timestamp': '2025-10-01 04:30:29.940152', 'step': 14157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:29.983282', 'step': 14157, 'epoch': 2} {'type': 'loss', 'content': 0.12491860240697861, 'timestamp': '2025-10-01 04:30:29.986581', 'step': 14158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:30.020079', 'step': 14158, 'epoch': 2} {'type': 'loss', 'content': 0.11113865673542023, 'timestamp': '2025-10-01 04:30:30.022397', 'step': 14159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:30.054603', 'step': 14159, 'epoch': 2} {'type': 'loss', 'content': 0.07231541723012924, 'timestamp': '2025-10-01 04:30:30.078233', 'step': 14160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:30.110337', 'step': 14160, 'epoch': 2} {'type': 'loss', 'content': 0.08733087033033371, 'timestamp': '2025-10-01 04:30:30.112299', 'step': 14161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:30.143923', 'step': 14161, 'epoch': 2} {'type': 'loss', 'content': 0.11660541594028473, 'timestamp': '2025-10-01 04:30:30.146155', 'step': 14162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:30.188573', 'step': 14162, 'epoch': 2} {'type': 'loss', 'content': 0.07073542475700378, 'timestamp': '2025-10-01 04:30:30.191018', 'step': 14163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:30.223818', 'step': 14163, 'epoch': 2} {'type': 'loss', 'content': 0.06026771664619446, 'timestamp': '2025-10-01 04:30:30.249708', 'step': 14164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:30.281144', 'step': 14164, 'epoch': 2} {'type': 'loss', 'content': 0.16195856034755707, 'timestamp': '2025-10-01 04:30:30.283195', 'step': 14165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:30:30.324325', 'step': 14165, 'epoch': 2} {'type': 'loss', 'content': 0.12041600793600082, 'timestamp': '2025-10-01 04:30:30.328625', 'step': 14166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:30.364729', 'step': 14166, 'epoch': 2} {'type': 'loss', 'content': 0.13187237083911896, 'timestamp': '2025-10-01 04:30:30.366837', 'step': 14167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:30.399021', 'step': 14167, 'epoch': 2} {'type': 'loss', 'content': 0.09121060371398926, 'timestamp': '2025-10-01 04:30:30.422560', 'step': 14168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:30.455131', 'step': 14168, 'epoch': 2} {'type': 'loss', 'content': 0.032989755272865295, 'timestamp': '2025-10-01 04:30:30.457246', 'step': 14169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:30.493149', 'step': 14169, 'epoch': 2} {'type': 'loss', 'content': 0.11490341275930405, 'timestamp': '2025-10-01 04:30:30.495385', 'step': 14170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:30.531402', 'step': 14170, 'epoch': 2} {'type': 'loss', 'content': 0.18666291236877441, 'timestamp': '2025-10-01 04:30:30.533863', 'step': 14171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:30.565909', 'step': 14171, 'epoch': 2} {'type': 'loss', 'content': 0.10287216305732727, 'timestamp': '2025-10-01 04:30:30.589416', 'step': 14172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:30.621394', 'step': 14172, 'epoch': 2} {'type': 'loss', 'content': 0.057183120399713516, 'timestamp': '2025-10-01 04:30:30.623500', 'step': 14173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:30.656574', 'step': 14173, 'epoch': 2} {'type': 'loss', 'content': 0.15683527290821075, 'timestamp': '2025-10-01 04:30:30.658761', 'step': 14174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:30.691592', 'step': 14174, 'epoch': 2} {'type': 'loss', 'content': 0.10987414419651031, 'timestamp': '2025-10-01 04:30:30.694204', 'step': 14175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:30.734213', 'step': 14175, 'epoch': 2} {'type': 'loss', 'content': 0.15554186701774597, 'timestamp': '2025-10-01 04:30:30.757791', 'step': 14176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:30:30.796904', 'step': 14176, 'epoch': 2} {'type': 'loss', 'content': 0.14413999021053314, 'timestamp': '2025-10-01 04:30:30.801958', 'step': 14177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:30.842620', 'step': 14177, 'epoch': 2} {'type': 'loss', 'content': 0.13140994310379028, 'timestamp': '2025-10-01 04:30:30.844731', 'step': 14178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:30.877965', 'step': 14178, 'epoch': 2} {'type': 'loss', 'content': 0.11903687566518784, 'timestamp': '2025-10-01 04:30:30.881648', 'step': 14179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:30.915329', 'step': 14179, 'epoch': 2} {'type': 'loss', 'content': 0.12590697407722473, 'timestamp': '2025-10-01 04:30:30.938861', 'step': 14180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:30.972196', 'step': 14180, 'epoch': 2} {'type': 'loss', 'content': 0.09132358431816101, 'timestamp': '2025-10-01 04:30:30.974264', 'step': 14181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:31.010052', 'step': 14181, 'epoch': 2} {'type': 'loss', 'content': 0.18287265300750732, 'timestamp': '2025-10-01 04:30:31.012097', 'step': 14182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:31.050414', 'step': 14182, 'epoch': 2} {'type': 'loss', 'content': 0.16039717197418213, 'timestamp': '2025-10-01 04:30:31.052563', 'step': 14183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:31.093829', 'step': 14183, 'epoch': 2} {'type': 'loss', 'content': 0.07809333503246307, 'timestamp': '2025-10-01 04:30:31.118034', 'step': 14184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:31.150480', 'step': 14184, 'epoch': 2} {'type': 'loss', 'content': 0.05525016039609909, 'timestamp': '2025-10-01 04:30:31.155411', 'step': 14185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:31.189002', 'step': 14185, 'epoch': 2} {'type': 'loss', 'content': 0.040268074721097946, 'timestamp': '2025-10-01 04:30:31.191754', 'step': 14186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:31.232977', 'step': 14186, 'epoch': 2} {'type': 'loss', 'content': 0.06251035630702972, 'timestamp': '2025-10-01 04:30:31.236495', 'step': 14187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:31.268907', 'step': 14187, 'epoch': 2} {'type': 'loss', 'content': 0.08090904355049133, 'timestamp': '2025-10-01 04:30:31.293094', 'step': 14188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:31.324884', 'step': 14188, 'epoch': 2} {'type': 'loss', 'content': 0.11025803536176682, 'timestamp': '2025-10-01 04:30:31.327413', 'step': 14189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:31.360109', 'step': 14189, 'epoch': 2} {'type': 'loss', 'content': 0.19215968251228333, 'timestamp': '2025-10-01 04:30:31.362380', 'step': 14190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:31.394695', 'step': 14190, 'epoch': 2} {'type': 'loss', 'content': 0.14540544152259827, 'timestamp': '2025-10-01 04:30:31.396820', 'step': 14191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:31.429622', 'step': 14191, 'epoch': 2} {'type': 'loss', 'content': 0.11884072422981262, 'timestamp': '2025-10-01 04:30:31.453425', 'step': 14192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:31.492618', 'step': 14192, 'epoch': 2} {'type': 'loss', 'content': 0.05149468407034874, 'timestamp': '2025-10-01 04:30:31.495029', 'step': 14193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:31.536015', 'step': 14193, 'epoch': 2} {'type': 'loss', 'content': 0.10617959499359131, 'timestamp': '2025-10-01 04:30:31.539495', 'step': 14194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:31.584121', 'step': 14194, 'epoch': 2} {'type': 'loss', 'content': 0.07922551035881042, 'timestamp': '2025-10-01 04:30:31.586617', 'step': 14195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:31.633308', 'step': 14195, 'epoch': 2} {'type': 'loss', 'content': 0.08219025284051895, 'timestamp': '2025-10-01 04:30:31.657119', 'step': 14196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:31.687977', 'step': 14196, 'epoch': 2} {'type': 'loss', 'content': 0.13430173695087433, 'timestamp': '2025-10-01 04:30:31.690307', 'step': 14197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:31.731608', 'step': 14197, 'epoch': 2} {'type': 'loss', 'content': 0.1459478884935379, 'timestamp': '2025-10-01 04:30:31.734132', 'step': 14198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:31.772681', 'step': 14198, 'epoch': 2} {'type': 'loss', 'content': 0.09568129479885101, 'timestamp': '2025-10-01 04:30:31.775022', 'step': 14199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:31.807006', 'step': 14199, 'epoch': 2} {'type': 'loss', 'content': 0.09976162761449814, 'timestamp': '2025-10-01 04:30:31.830617', 'step': 14200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:31.862788', 'step': 14200, 'epoch': 2} {'type': 'loss', 'content': 0.13575901091098785, 'timestamp': '2025-10-01 04:30:31.865356', 'step': 14201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:31.897683', 'step': 14201, 'epoch': 2} {'type': 'loss', 'content': 0.06888165324926376, 'timestamp': '2025-10-01 04:30:31.900495', 'step': 14202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:31.932451', 'step': 14202, 'epoch': 2} {'type': 'loss', 'content': 0.10427649319171906, 'timestamp': '2025-10-01 04:30:31.934818', 'step': 14203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:31.966684', 'step': 14203, 'epoch': 2} {'type': 'loss', 'content': 0.08688055723905563, 'timestamp': '2025-10-01 04:30:31.990472', 'step': 14204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:32.032609', 'step': 14204, 'epoch': 2} {'type': 'loss', 'content': 0.08700268715620041, 'timestamp': '2025-10-01 04:30:32.038403', 'step': 14205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:32.093914', 'step': 14205, 'epoch': 2} {'type': 'loss', 'content': 0.07605115324258804, 'timestamp': '2025-10-01 04:30:32.096195', 'step': 14206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:32.129362', 'step': 14206, 'epoch': 2} {'type': 'loss', 'content': 0.1414736658334732, 'timestamp': '2025-10-01 04:30:32.131958', 'step': 14207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:32.171587', 'step': 14207, 'epoch': 2} {'type': 'loss', 'content': 0.1325254887342453, 'timestamp': '2025-10-01 04:30:32.195584', 'step': 14208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:32.243418', 'step': 14208, 'epoch': 2} {'type': 'loss', 'content': 0.049790214747190475, 'timestamp': '2025-10-01 04:30:32.246443', 'step': 14209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:32.290990', 'step': 14209, 'epoch': 2} {'type': 'loss', 'content': 0.18484796583652496, 'timestamp': '2025-10-01 04:30:32.293342', 'step': 14210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:32.333676', 'step': 14210, 'epoch': 2} {'type': 'loss', 'content': 0.057187750935554504, 'timestamp': '2025-10-01 04:30:32.338636', 'step': 14211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:32.377078', 'step': 14211, 'epoch': 2} {'type': 'loss', 'content': 0.11375705152750015, 'timestamp': '2025-10-01 04:30:32.400960', 'step': 14212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:32.434078', 'step': 14212, 'epoch': 2} {'type': 'loss', 'content': 0.12428484112024307, 'timestamp': '2025-10-01 04:30:32.436255', 'step': 14213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:32.470193', 'step': 14213, 'epoch': 2} {'type': 'loss', 'content': 0.06557457894086838, 'timestamp': '2025-10-01 04:30:32.472227', 'step': 14214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:32.505252', 'step': 14214, 'epoch': 2} {'type': 'loss', 'content': 0.08001145720481873, 'timestamp': '2025-10-01 04:30:32.507560', 'step': 14215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:32.558723', 'step': 14215, 'epoch': 2} {'type': 'loss', 'content': 0.12580269575119019, 'timestamp': '2025-10-01 04:30:32.582229', 'step': 14216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:32.623073', 'step': 14216, 'epoch': 2} {'type': 'loss', 'content': 0.10590691864490509, 'timestamp': '2025-10-01 04:30:32.625132', 'step': 14217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:32.696396', 'step': 14217, 'epoch': 2} {'type': 'loss', 'content': 0.03016151487827301, 'timestamp': '2025-10-01 04:30:32.698847', 'step': 14218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:32.732610', 'step': 14218, 'epoch': 2} {'type': 'loss', 'content': 0.08653956651687622, 'timestamp': '2025-10-01 04:30:32.734598', 'step': 14219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:32.782949', 'step': 14219, 'epoch': 2} {'type': 'loss', 'content': 0.07308907061815262, 'timestamp': '2025-10-01 04:30:32.806499', 'step': 14220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:32.839326', 'step': 14220, 'epoch': 2} {'type': 'loss', 'content': 0.13754574954509735, 'timestamp': '2025-10-01 04:30:32.841356', 'step': 14221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:32.873411', 'step': 14221, 'epoch': 2} {'type': 'loss', 'content': 0.12217622250318527, 'timestamp': '2025-10-01 04:30:32.880677', 'step': 14222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:32.916527', 'step': 14222, 'epoch': 2} {'type': 'loss', 'content': 0.07984941452741623, 'timestamp': '2025-10-01 04:30:32.918977', 'step': 14223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:32.955384', 'step': 14223, 'epoch': 2} {'type': 'loss', 'content': 0.07243310660123825, 'timestamp': '2025-10-01 04:30:32.979369', 'step': 14224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:33.030479', 'step': 14224, 'epoch': 2} {'type': 'loss', 'content': 0.13722288608551025, 'timestamp': '2025-10-01 04:30:33.032470', 'step': 14225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:33.066163', 'step': 14225, 'epoch': 2} {'type': 'loss', 'content': 0.10653410851955414, 'timestamp': '2025-10-01 04:30:33.068256', 'step': 14226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:33.105706', 'step': 14226, 'epoch': 2} {'type': 'loss', 'content': 0.10790914297103882, 'timestamp': '2025-10-01 04:30:33.108414', 'step': 14227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:33.160566', 'step': 14227, 'epoch': 2} {'type': 'loss', 'content': 0.15676651895046234, 'timestamp': '2025-10-01 04:30:33.184262', 'step': 14228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:33.225904', 'step': 14228, 'epoch': 2} {'type': 'loss', 'content': 0.038841329514980316, 'timestamp': '2025-10-01 04:30:33.227886', 'step': 14229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:33.260675', 'step': 14229, 'epoch': 2} {'type': 'loss', 'content': 0.18275459110736847, 'timestamp': '2025-10-01 04:30:33.263602', 'step': 14230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:33.299380', 'step': 14230, 'epoch': 2} {'type': 'loss', 'content': 0.0776316449046135, 'timestamp': '2025-10-01 04:30:33.301804', 'step': 14231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:33.336198', 'step': 14231, 'epoch': 2} {'type': 'loss', 'content': 0.1196100115776062, 'timestamp': '2025-10-01 04:30:33.359688', 'step': 14232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:33.404689', 'step': 14232, 'epoch': 2} {'type': 'loss', 'content': 0.063100665807724, 'timestamp': '2025-10-01 04:30:33.406793', 'step': 14233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:33.461751', 'step': 14233, 'epoch': 2} {'type': 'loss', 'content': 0.14599262177944183, 'timestamp': '2025-10-01 04:30:33.464233', 'step': 14234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:33.498828', 'step': 14234, 'epoch': 2} {'type': 'loss', 'content': 0.03807322680950165, 'timestamp': '2025-10-01 04:30:33.500930', 'step': 14235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:33.557928', 'step': 14235, 'epoch': 2} {'type': 'loss', 'content': 0.0753592848777771, 'timestamp': '2025-10-01 04:30:33.581446', 'step': 14236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:33.628005', 'step': 14236, 'epoch': 2} {'type': 'loss', 'content': 0.03608649596571922, 'timestamp': '2025-10-01 04:30:33.630123', 'step': 14237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:33.663113', 'step': 14237, 'epoch': 2} {'type': 'loss', 'content': 0.12408442795276642, 'timestamp': '2025-10-01 04:30:33.665448', 'step': 14238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:33.698694', 'step': 14238, 'epoch': 2} {'type': 'loss', 'content': 0.12197747081518173, 'timestamp': '2025-10-01 04:30:33.700896', 'step': 14239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:33.733827', 'step': 14239, 'epoch': 2} {'type': 'loss', 'content': 0.11145853996276855, 'timestamp': '2025-10-01 04:30:33.757382', 'step': 14240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:33.794689', 'step': 14240, 'epoch': 2} {'type': 'loss', 'content': 0.0392395481467247, 'timestamp': '2025-10-01 04:30:33.796647', 'step': 14241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:33.834067', 'step': 14241, 'epoch': 2} {'type': 'loss', 'content': 0.060519080609083176, 'timestamp': '2025-10-01 04:30:33.836944', 'step': 14242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:33.879164', 'step': 14242, 'epoch': 2} {'type': 'loss', 'content': 0.17284944653511047, 'timestamp': '2025-10-01 04:30:33.881151', 'step': 14243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:33.912694', 'step': 14243, 'epoch': 2} {'type': 'loss', 'content': 0.10730661451816559, 'timestamp': '2025-10-01 04:30:33.936461', 'step': 14244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:33.979679', 'step': 14244, 'epoch': 2} {'type': 'loss', 'content': 0.11700676381587982, 'timestamp': '2025-10-01 04:30:33.981663', 'step': 14245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:34.024208', 'step': 14245, 'epoch': 2} {'type': 'loss', 'content': 0.06351996958255768, 'timestamp': '2025-10-01 04:30:34.026245', 'step': 14246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:34.070590', 'step': 14246, 'epoch': 2} {'type': 'loss', 'content': 0.17251260578632355, 'timestamp': '2025-10-01 04:30:34.073113', 'step': 14247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:34.108681', 'step': 14247, 'epoch': 2} {'type': 'loss', 'content': 0.09494177252054214, 'timestamp': '2025-10-01 04:30:34.132713', 'step': 14248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:34.188966', 'step': 14248, 'epoch': 2} {'type': 'loss', 'content': 0.09796851873397827, 'timestamp': '2025-10-01 04:30:34.190917', 'step': 14249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:34.230117', 'step': 14249, 'epoch': 2} {'type': 'loss', 'content': 0.12399151921272278, 'timestamp': '2025-10-01 04:30:34.232227', 'step': 14250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:34.277146', 'step': 14250, 'epoch': 2} {'type': 'loss', 'content': 0.08273588120937347, 'timestamp': '2025-10-01 04:30:34.279172', 'step': 14251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:34.327706', 'step': 14251, 'epoch': 2} {'type': 'loss', 'content': 0.07280777394771576, 'timestamp': '2025-10-01 04:30:34.351376', 'step': 14252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:34.401264', 'step': 14252, 'epoch': 2} {'type': 'loss', 'content': 0.07708913832902908, 'timestamp': '2025-10-01 04:30:34.403404', 'step': 14253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:34.439282', 'step': 14253, 'epoch': 2} {'type': 'loss', 'content': 0.089966781437397, 'timestamp': '2025-10-01 04:30:34.441466', 'step': 14254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:34.477532', 'step': 14254, 'epoch': 2} {'type': 'loss', 'content': 0.06996412575244904, 'timestamp': '2025-10-01 04:30:34.479973', 'step': 14255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:34.514733', 'step': 14255, 'epoch': 2} {'type': 'loss', 'content': 0.1147170141339302, 'timestamp': '2025-10-01 04:30:34.538879', 'step': 14256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:34.579484', 'step': 14256, 'epoch': 2} {'type': 'loss', 'content': 0.1337834596633911, 'timestamp': '2025-10-01 04:30:34.581698', 'step': 14257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:34.614156', 'step': 14257, 'epoch': 2} {'type': 'loss', 'content': 0.16964863240718842, 'timestamp': '2025-10-01 04:30:34.616178', 'step': 14258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:34.650270', 'step': 14258, 'epoch': 2} {'type': 'loss', 'content': 0.11837965250015259, 'timestamp': '2025-10-01 04:30:34.663473', 'step': 14259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:34.700462', 'step': 14259, 'epoch': 2} {'type': 'loss', 'content': 0.1364385038614273, 'timestamp': '2025-10-01 04:30:34.723941', 'step': 14260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:34.758596', 'step': 14260, 'epoch': 2} {'type': 'loss', 'content': 0.1280355006456375, 'timestamp': '2025-10-01 04:30:34.760637', 'step': 14261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:34.798060', 'step': 14261, 'epoch': 2} {'type': 'loss', 'content': 0.17896118760108948, 'timestamp': '2025-10-01 04:30:34.800151', 'step': 14262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:34.837698', 'step': 14262, 'epoch': 2} {'type': 'loss', 'content': 0.1501559019088745, 'timestamp': '2025-10-01 04:30:34.839800', 'step': 14263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:34.874794', 'step': 14263, 'epoch': 2} {'type': 'loss', 'content': 0.0860002189874649, 'timestamp': '2025-10-01 04:30:34.898289', 'step': 14264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:34.937221', 'step': 14264, 'epoch': 2} {'type': 'loss', 'content': 0.14474837481975555, 'timestamp': '2025-10-01 04:30:34.940118', 'step': 14265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:34.980298', 'step': 14265, 'epoch': 2} {'type': 'loss', 'content': 0.07544097304344177, 'timestamp': '2025-10-01 04:30:34.982319', 'step': 14266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:35.020594', 'step': 14266, 'epoch': 2} {'type': 'loss', 'content': 0.04461867734789848, 'timestamp': '2025-10-01 04:30:35.022714', 'step': 14267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:35.060642', 'step': 14267, 'epoch': 2} {'type': 'loss', 'content': 0.14816327393054962, 'timestamp': '2025-10-01 04:30:35.084193', 'step': 14268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:35.119999', 'step': 14268, 'epoch': 2} {'type': 'loss', 'content': 0.08266221731901169, 'timestamp': '2025-10-01 04:30:35.122093', 'step': 14269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:35.155797', 'step': 14269, 'epoch': 2} {'type': 'loss', 'content': 0.13792911171913147, 'timestamp': '2025-10-01 04:30:35.157966', 'step': 14270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:35.208999', 'step': 14270, 'epoch': 2} {'type': 'loss', 'content': 0.08088655769824982, 'timestamp': '2025-10-01 04:30:35.211174', 'step': 14271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:35.244632', 'step': 14271, 'epoch': 2} {'type': 'loss', 'content': 0.14508980512619019, 'timestamp': '2025-10-01 04:30:35.268142', 'step': 14272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:35.313244', 'step': 14272, 'epoch': 2} {'type': 'loss', 'content': 0.0827707052230835, 'timestamp': '2025-10-01 04:30:35.315226', 'step': 14273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:35.357987', 'step': 14273, 'epoch': 2} {'type': 'loss', 'content': 0.0492384135723114, 'timestamp': '2025-10-01 04:30:35.361106', 'step': 14274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:35.393958', 'step': 14274, 'epoch': 2} {'type': 'loss', 'content': 0.07375435531139374, 'timestamp': '2025-10-01 04:30:35.396216', 'step': 14275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:35.430728', 'step': 14275, 'epoch': 2} {'type': 'loss', 'content': 0.08786094933748245, 'timestamp': '2025-10-01 04:30:35.454342', 'step': 14276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:35.491774', 'step': 14276, 'epoch': 2} {'type': 'loss', 'content': 0.08145526051521301, 'timestamp': '2025-10-01 04:30:35.494239', 'step': 14277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:35.529063', 'step': 14277, 'epoch': 2} {'type': 'loss', 'content': 0.12281037122011185, 'timestamp': '2025-10-01 04:30:35.531075', 'step': 14278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:35.565045', 'step': 14278, 'epoch': 2} {'type': 'loss', 'content': 0.13620911538600922, 'timestamp': '2025-10-01 04:30:35.566930', 'step': 14279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:35.610157', 'step': 14279, 'epoch': 2} {'type': 'loss', 'content': 0.055724069476127625, 'timestamp': '2025-10-01 04:30:35.633644', 'step': 14280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:35.678689', 'step': 14280, 'epoch': 2} {'type': 'loss', 'content': 0.12021665275096893, 'timestamp': '2025-10-01 04:30:35.680745', 'step': 14281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:35.713798', 'step': 14281, 'epoch': 2} {'type': 'loss', 'content': 0.09879010915756226, 'timestamp': '2025-10-01 04:30:35.715878', 'step': 14282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:35.749193', 'step': 14282, 'epoch': 2} {'type': 'loss', 'content': 0.08072318136692047, 'timestamp': '2025-10-01 04:30:35.752072', 'step': 14283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:35.785646', 'step': 14283, 'epoch': 2} {'type': 'loss', 'content': 0.11007750034332275, 'timestamp': '2025-10-01 04:30:35.809370', 'step': 14284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:35.849876', 'step': 14284, 'epoch': 2} {'type': 'loss', 'content': 0.08089759945869446, 'timestamp': '2025-10-01 04:30:35.852148', 'step': 14285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:35.886561', 'step': 14285, 'epoch': 2} {'type': 'loss', 'content': 0.1525367945432663, 'timestamp': '2025-10-01 04:30:35.888718', 'step': 14286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:35.921714', 'step': 14286, 'epoch': 2} {'type': 'loss', 'content': 0.1310621201992035, 'timestamp': '2025-10-01 04:30:35.923808', 'step': 14287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:35.963695', 'step': 14287, 'epoch': 2} {'type': 'loss', 'content': 0.07057171314954758, 'timestamp': '2025-10-01 04:30:35.987226', 'step': 14288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:36.028968', 'step': 14288, 'epoch': 2} {'type': 'loss', 'content': 0.08687921613454819, 'timestamp': '2025-10-01 04:30:36.031037', 'step': 14289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:36.072396', 'step': 14289, 'epoch': 2} {'type': 'loss', 'content': 0.07633143663406372, 'timestamp': '2025-10-01 04:30:36.075256', 'step': 14290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:36.116562', 'step': 14290, 'epoch': 2} {'type': 'loss', 'content': 0.11469579488039017, 'timestamp': '2025-10-01 04:30:36.118541', 'step': 14291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:36.166707', 'step': 14291, 'epoch': 2} {'type': 'loss', 'content': 0.05263623595237732, 'timestamp': '2025-10-01 04:30:36.190786', 'step': 14292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:36.234521', 'step': 14292, 'epoch': 2} {'type': 'loss', 'content': 0.1194806769490242, 'timestamp': '2025-10-01 04:30:36.236491', 'step': 14293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:36.269395', 'step': 14293, 'epoch': 2} {'type': 'loss', 'content': 0.1045180931687355, 'timestamp': '2025-10-01 04:30:36.274626', 'step': 14294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:36.310321', 'step': 14294, 'epoch': 2} {'type': 'loss', 'content': 0.057974204421043396, 'timestamp': '2025-10-01 04:30:36.312343', 'step': 14295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:36.347285', 'step': 14295, 'epoch': 2} {'type': 'loss', 'content': 0.08545460551977158, 'timestamp': '2025-10-01 04:30:36.370773', 'step': 14296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:36.413500', 'step': 14296, 'epoch': 2} {'type': 'loss', 'content': 0.14518703520298004, 'timestamp': '2025-10-01 04:30:36.415690', 'step': 14297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:36.459983', 'step': 14297, 'epoch': 2} {'type': 'loss', 'content': 0.09514565765857697, 'timestamp': '2025-10-01 04:30:36.462028', 'step': 14298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:36.504582', 'step': 14298, 'epoch': 2} {'type': 'loss', 'content': 0.10142457485198975, 'timestamp': '2025-10-01 04:30:36.507750', 'step': 14299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:36.541294', 'step': 14299, 'epoch': 2} {'type': 'loss', 'content': 0.07956485450267792, 'timestamp': '2025-10-01 04:30:36.565127', 'step': 14300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:36.603057', 'step': 14300, 'epoch': 2} {'type': 'loss', 'content': 0.08030173182487488, 'timestamp': '2025-10-01 04:30:36.605463', 'step': 14301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:36.641870', 'step': 14301, 'epoch': 2} {'type': 'loss', 'content': 0.06459737569093704, 'timestamp': '2025-10-01 04:30:36.643915', 'step': 14302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:36.677247', 'step': 14302, 'epoch': 2} {'type': 'loss', 'content': 0.10697965323925018, 'timestamp': '2025-10-01 04:30:36.679441', 'step': 14303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:36.713852', 'step': 14303, 'epoch': 2} {'type': 'loss', 'content': 0.07700596004724503, 'timestamp': '2025-10-01 04:30:36.737481', 'step': 14304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:36.803282', 'step': 14304, 'epoch': 2} {'type': 'loss', 'content': 0.11872822791337967, 'timestamp': '2025-10-01 04:30:36.805392', 'step': 14305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:36.855367', 'step': 14305, 'epoch': 2} {'type': 'loss', 'content': 0.07315042614936829, 'timestamp': '2025-10-01 04:30:36.857448', 'step': 14306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:36.899951', 'step': 14306, 'epoch': 2} {'type': 'loss', 'content': 0.04180043563246727, 'timestamp': '2025-10-01 04:30:36.902157', 'step': 14307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:36.935900', 'step': 14307, 'epoch': 2} {'type': 'loss', 'content': 0.10740646719932556, 'timestamp': '2025-10-01 04:30:36.959494', 'step': 14308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:37.004553', 'step': 14308, 'epoch': 2} {'type': 'loss', 'content': 0.10789022594690323, 'timestamp': '2025-10-01 04:30:37.007303', 'step': 14309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:37.039941', 'step': 14309, 'epoch': 2} {'type': 'loss', 'content': 0.06502877175807953, 'timestamp': '2025-10-01 04:30:37.042454', 'step': 14310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:37.083700', 'step': 14310, 'epoch': 2} {'type': 'loss', 'content': 0.05033859610557556, 'timestamp': '2025-10-01 04:30:37.085836', 'step': 14311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:37.119806', 'step': 14311, 'epoch': 2} {'type': 'loss', 'content': 0.06546514481306076, 'timestamp': '2025-10-01 04:30:37.143304', 'step': 14312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:37.185703', 'step': 14312, 'epoch': 2} {'type': 'loss', 'content': 0.08478987216949463, 'timestamp': '2025-10-01 04:30:37.189128', 'step': 14313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:37.231024', 'step': 14313, 'epoch': 2} {'type': 'loss', 'content': 0.11749500036239624, 'timestamp': '2025-10-01 04:30:37.233466', 'step': 14314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:37.269499', 'step': 14314, 'epoch': 2} {'type': 'loss', 'content': 0.12652190029621124, 'timestamp': '2025-10-01 04:30:37.271800', 'step': 14315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:37.308842', 'step': 14315, 'epoch': 2} {'type': 'loss', 'content': 0.09200487285852432, 'timestamp': '2025-10-01 04:30:37.332547', 'step': 14316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:37.367787', 'step': 14316, 'epoch': 2} {'type': 'loss', 'content': 0.030544571578502655, 'timestamp': '2025-10-01 04:30:37.369732', 'step': 14317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:37.402171', 'step': 14317, 'epoch': 2} {'type': 'loss', 'content': 0.07280754297971725, 'timestamp': '2025-10-01 04:30:37.404258', 'step': 14318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:37.629919', 'step': 14318, 'epoch': 2} {'type': 'loss', 'content': 0.11572988331317902, 'timestamp': '2025-10-01 04:30:37.632096', 'step': 14319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:37.666730', 'step': 14319, 'epoch': 2} {'type': 'loss', 'content': 0.1121269166469574, 'timestamp': '2025-10-01 04:30:37.690338', 'step': 14320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:37.734626', 'step': 14320, 'epoch': 2} {'type': 'loss', 'content': 0.11617182195186615, 'timestamp': '2025-10-01 04:30:37.737731', 'step': 14321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:37.772439', 'step': 14321, 'epoch': 2} {'type': 'loss', 'content': 0.10088043659925461, 'timestamp': '2025-10-01 04:30:37.775023', 'step': 14322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:37.813249', 'step': 14322, 'epoch': 2} {'type': 'loss', 'content': 0.19798146188259125, 'timestamp': '2025-10-01 04:30:37.815386', 'step': 14323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:37.860516', 'step': 14323, 'epoch': 2} {'type': 'loss', 'content': 0.1569317728281021, 'timestamp': '2025-10-01 04:30:37.886274', 'step': 14324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:37.928883', 'step': 14324, 'epoch': 2} {'type': 'loss', 'content': 0.051667507737874985, 'timestamp': '2025-10-01 04:30:37.931175', 'step': 14325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:37.966529', 'step': 14325, 'epoch': 2} {'type': 'loss', 'content': 0.05894362926483154, 'timestamp': '2025-10-01 04:30:37.969040', 'step': 14326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:38.003285', 'step': 14326, 'epoch': 2} {'type': 'loss', 'content': 0.10539203882217407, 'timestamp': '2025-10-01 04:30:38.005388', 'step': 14327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:38.047248', 'step': 14327, 'epoch': 2} {'type': 'loss', 'content': 0.08900140225887299, 'timestamp': '2025-10-01 04:30:38.071099', 'step': 14328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:38.119640', 'step': 14328, 'epoch': 2} {'type': 'loss', 'content': 0.12025720626115799, 'timestamp': '2025-10-01 04:30:38.121847', 'step': 14329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:38.154390', 'step': 14329, 'epoch': 2} {'type': 'loss', 'content': 0.0643651932477951, 'timestamp': '2025-10-01 04:30:38.156516', 'step': 14330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:38.192119', 'step': 14330, 'epoch': 2} {'type': 'loss', 'content': 0.13210099935531616, 'timestamp': '2025-10-01 04:30:38.194167', 'step': 14331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:38.226851', 'step': 14331, 'epoch': 2} {'type': 'loss', 'content': 0.14006082713603973, 'timestamp': '2025-10-01 04:30:38.252277', 'step': 14332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:38.288417', 'step': 14332, 'epoch': 2} {'type': 'loss', 'content': 0.10904364287853241, 'timestamp': '2025-10-01 04:30:38.293292', 'step': 14333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:38.328062', 'step': 14333, 'epoch': 2} {'type': 'loss', 'content': 0.11014906316995621, 'timestamp': '2025-10-01 04:30:38.330497', 'step': 14334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:38.375366', 'step': 14334, 'epoch': 2} {'type': 'loss', 'content': 0.1741635501384735, 'timestamp': '2025-10-01 04:30:38.377498', 'step': 14335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:38.409721', 'step': 14335, 'epoch': 2} {'type': 'loss', 'content': 0.0849640816450119, 'timestamp': '2025-10-01 04:30:38.433349', 'step': 14336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:38.466795', 'step': 14336, 'epoch': 2} {'type': 'loss', 'content': 0.1236526146531105, 'timestamp': '2025-10-01 04:30:38.468944', 'step': 14337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:38.502448', 'step': 14337, 'epoch': 2} {'type': 'loss', 'content': 0.0840444266796112, 'timestamp': '2025-10-01 04:30:38.504529', 'step': 14338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:38.536373', 'step': 14338, 'epoch': 2} {'type': 'loss', 'content': 0.09793854504823685, 'timestamp': '2025-10-01 04:30:38.538510', 'step': 14339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:38.580636', 'step': 14339, 'epoch': 2} {'type': 'loss', 'content': 0.09406482428312302, 'timestamp': '2025-10-01 04:30:38.604228', 'step': 14340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:38.639171', 'step': 14340, 'epoch': 2} {'type': 'loss', 'content': 0.08117849379777908, 'timestamp': '2025-10-01 04:30:38.641223', 'step': 14341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:38.675497', 'step': 14341, 'epoch': 2} {'type': 'loss', 'content': 0.18998479843139648, 'timestamp': '2025-10-01 04:30:38.678055', 'step': 14342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:38.726642', 'step': 14342, 'epoch': 2} {'type': 'loss', 'content': 0.05639189854264259, 'timestamp': '2025-10-01 04:30:38.729070', 'step': 14343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:38.761568', 'step': 14343, 'epoch': 2} {'type': 'loss', 'content': 0.11013975739479065, 'timestamp': '2025-10-01 04:30:38.785513', 'step': 14344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:38.818026', 'step': 14344, 'epoch': 2} {'type': 'loss', 'content': 0.08786574751138687, 'timestamp': '2025-10-01 04:30:38.820495', 'step': 14345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:38.854076', 'step': 14345, 'epoch': 2} {'type': 'loss', 'content': 0.11705385893583298, 'timestamp': '2025-10-01 04:30:38.856266', 'step': 14346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:38.897741', 'step': 14346, 'epoch': 2} {'type': 'loss', 'content': 0.0704854279756546, 'timestamp': '2025-10-01 04:30:38.899935', 'step': 14347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:30:38.936393', 'step': 14347, 'epoch': 2} {'type': 'loss', 'content': 0.05852613225579262, 'timestamp': '2025-10-01 04:30:38.961668', 'step': 14348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:39.014261', 'step': 14348, 'epoch': 2} {'type': 'loss', 'content': 0.04657655581831932, 'timestamp': '2025-10-01 04:30:39.016364', 'step': 14349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:39.060808', 'step': 14349, 'epoch': 2} {'type': 'loss', 'content': 0.08049676567316055, 'timestamp': '2025-10-01 04:30:39.063084', 'step': 14350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:39.110209', 'step': 14350, 'epoch': 2} {'type': 'loss', 'content': 0.07639417052268982, 'timestamp': '2025-10-01 04:30:39.112787', 'step': 14351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:39.153701', 'step': 14351, 'epoch': 2} {'type': 'loss', 'content': 0.1088506355881691, 'timestamp': '2025-10-01 04:30:39.177324', 'step': 14352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:39.212683', 'step': 14352, 'epoch': 2} {'type': 'loss', 'content': 0.16239434480667114, 'timestamp': '2025-10-01 04:30:39.214838', 'step': 14353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:39.257009', 'step': 14353, 'epoch': 2} {'type': 'loss', 'content': 0.08007290214300156, 'timestamp': '2025-10-01 04:30:39.259135', 'step': 14354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:39.298201', 'step': 14354, 'epoch': 2} {'type': 'loss', 'content': 0.10328686237335205, 'timestamp': '2025-10-01 04:30:39.300326', 'step': 14355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:39.352912', 'step': 14355, 'epoch': 2} {'type': 'loss', 'content': 0.09260042011737823, 'timestamp': '2025-10-01 04:30:39.376705', 'step': 14356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:39.421749', 'step': 14356, 'epoch': 2} {'type': 'loss', 'content': 0.10639095306396484, 'timestamp': '2025-10-01 04:30:39.423942', 'step': 14357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:39.465122', 'step': 14357, 'epoch': 2} {'type': 'loss', 'content': 0.09131753444671631, 'timestamp': '2025-10-01 04:30:39.467247', 'step': 14358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:39.506519', 'step': 14358, 'epoch': 2} {'type': 'loss', 'content': 0.11748489737510681, 'timestamp': '2025-10-01 04:30:39.508693', 'step': 14359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:39.540851', 'step': 14359, 'epoch': 2} {'type': 'loss', 'content': 0.07734854519367218, 'timestamp': '2025-10-01 04:30:39.564672', 'step': 14360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:39.624033', 'step': 14360, 'epoch': 2} {'type': 'loss', 'content': 0.08378738909959793, 'timestamp': '2025-10-01 04:30:39.627127', 'step': 14361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:39.659370', 'step': 14361, 'epoch': 2} {'type': 'loss', 'content': 0.1852666288614273, 'timestamp': '2025-10-01 04:30:39.661633', 'step': 14362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:39.696864', 'step': 14362, 'epoch': 2} {'type': 'loss', 'content': 0.13781164586544037, 'timestamp': '2025-10-01 04:30:39.701473', 'step': 14363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:39.736089', 'step': 14363, 'epoch': 2} {'type': 'loss', 'content': 0.06219424679875374, 'timestamp': '2025-10-01 04:30:39.759764', 'step': 14364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:39.792866', 'step': 14364, 'epoch': 2} {'type': 'loss', 'content': 0.14205341041088104, 'timestamp': '2025-10-01 04:30:39.794998', 'step': 14365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:39.830382', 'step': 14365, 'epoch': 2} {'type': 'loss', 'content': 0.14844973385334015, 'timestamp': '2025-10-01 04:30:39.832934', 'step': 14366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:39.865625', 'step': 14366, 'epoch': 2} {'type': 'loss', 'content': 0.08453486859798431, 'timestamp': '2025-10-01 04:30:39.868612', 'step': 14367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:39.902363', 'step': 14367, 'epoch': 2} {'type': 'loss', 'content': 0.06409032642841339, 'timestamp': '2025-10-01 04:30:39.925992', 'step': 14368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:39.967752', 'step': 14368, 'epoch': 2} {'type': 'loss', 'content': 0.0905190035700798, 'timestamp': '2025-10-01 04:30:39.969843', 'step': 14369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:40.007986', 'step': 14369, 'epoch': 2} {'type': 'loss', 'content': 0.14259208738803864, 'timestamp': '2025-10-01 04:30:40.010132', 'step': 14370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:40.040898', 'step': 14370, 'epoch': 2} {'type': 'loss', 'content': 0.0898161381483078, 'timestamp': '2025-10-01 04:30:40.043288', 'step': 14371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:40.088296', 'step': 14371, 'epoch': 2} {'type': 'loss', 'content': 0.09607107937335968, 'timestamp': '2025-10-01 04:30:40.114116', 'step': 14372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:40.150505', 'step': 14372, 'epoch': 2} {'type': 'loss', 'content': 0.05767931416630745, 'timestamp': '2025-10-01 04:30:40.152478', 'step': 14373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:40.183459', 'step': 14373, 'epoch': 2} {'type': 'loss', 'content': 0.08028484880924225, 'timestamp': '2025-10-01 04:30:40.185534', 'step': 14374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:40.223398', 'step': 14374, 'epoch': 2} {'type': 'loss', 'content': 0.14163470268249512, 'timestamp': '2025-10-01 04:30:40.225688', 'step': 14375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:40.257166', 'step': 14375, 'epoch': 2} {'type': 'loss', 'content': 0.07790515571832657, 'timestamp': '2025-10-01 04:30:40.280696', 'step': 14376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:40.319114', 'step': 14376, 'epoch': 2} {'type': 'loss', 'content': 0.06618456542491913, 'timestamp': '2025-10-01 04:30:40.321176', 'step': 14377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:40.358370', 'step': 14377, 'epoch': 2} {'type': 'loss', 'content': 0.030020821839571, 'timestamp': '2025-10-01 04:30:40.360484', 'step': 14378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:40.393716', 'step': 14378, 'epoch': 2} {'type': 'loss', 'content': 0.1063486859202385, 'timestamp': '2025-10-01 04:30:40.396208', 'step': 14379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:40.436898', 'step': 14379, 'epoch': 2} {'type': 'loss', 'content': 0.04635554552078247, 'timestamp': '2025-10-01 04:30:40.460834', 'step': 14380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:40.495562', 'step': 14380, 'epoch': 2} {'type': 'loss', 'content': 0.05172521993517876, 'timestamp': '2025-10-01 04:30:40.497600', 'step': 14381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:40.538111', 'step': 14381, 'epoch': 2} {'type': 'loss', 'content': 0.10329980403184891, 'timestamp': '2025-10-01 04:30:40.540461', 'step': 14382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:40.586415', 'step': 14382, 'epoch': 2} {'type': 'loss', 'content': 0.10061952471733093, 'timestamp': '2025-10-01 04:30:40.589391', 'step': 14383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:40.630396', 'step': 14383, 'epoch': 2} {'type': 'loss', 'content': 0.08493954688310623, 'timestamp': '2025-10-01 04:30:40.658758', 'step': 14384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:40.693057', 'step': 14384, 'epoch': 2} {'type': 'loss', 'content': 0.08650342375040054, 'timestamp': '2025-10-01 04:30:40.695622', 'step': 14385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:30:40.737414', 'step': 14385, 'epoch': 2} {'type': 'loss', 'content': 0.09576007723808289, 'timestamp': '2025-10-01 04:30:40.741756', 'step': 14386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:40.782459', 'step': 14386, 'epoch': 2} {'type': 'loss', 'content': 0.09399036318063736, 'timestamp': '2025-10-01 04:30:40.784690', 'step': 14387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:40.833589', 'step': 14387, 'epoch': 2} {'type': 'loss', 'content': 0.11948404461145401, 'timestamp': '2025-10-01 04:30:40.857556', 'step': 14388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:40.889118', 'step': 14388, 'epoch': 2} {'type': 'loss', 'content': 0.1379084289073944, 'timestamp': '2025-10-01 04:30:40.891661', 'step': 14389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:40.923337', 'step': 14389, 'epoch': 2} {'type': 'loss', 'content': 0.12653331458568573, 'timestamp': '2025-10-01 04:30:40.925411', 'step': 14390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:40.956889', 'step': 14390, 'epoch': 2} {'type': 'loss', 'content': 0.06871463358402252, 'timestamp': '2025-10-01 04:30:40.965683', 'step': 14391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:41.012795', 'step': 14391, 'epoch': 2} {'type': 'loss', 'content': 0.04146962985396385, 'timestamp': '2025-10-01 04:30:41.036409', 'step': 14392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:41.071735', 'step': 14392, 'epoch': 2} {'type': 'loss', 'content': 0.048882100731134415, 'timestamp': '2025-10-01 04:30:41.073766', 'step': 14393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:41.120116', 'step': 14393, 'epoch': 2} {'type': 'loss', 'content': 0.10026686638593674, 'timestamp': '2025-10-01 04:30:41.122378', 'step': 14394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:41.162503', 'step': 14394, 'epoch': 2} {'type': 'loss', 'content': 0.07532710582017899, 'timestamp': '2025-10-01 04:30:41.164722', 'step': 14395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:41.197182', 'step': 14395, 'epoch': 2} {'type': 'loss', 'content': 0.08923231065273285, 'timestamp': '2025-10-01 04:30:41.220996', 'step': 14396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:41.253322', 'step': 14396, 'epoch': 2} {'type': 'loss', 'content': 0.07617273926734924, 'timestamp': '2025-10-01 04:30:41.255480', 'step': 14397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:41.286100', 'step': 14397, 'epoch': 2} {'type': 'loss', 'content': 0.06388361006975174, 'timestamp': '2025-10-01 04:30:41.288143', 'step': 14398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:30:41.324917', 'step': 14398, 'epoch': 2} {'type': 'loss', 'content': 0.06513186544179916, 'timestamp': '2025-10-01 04:30:41.329764', 'step': 14399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:41.369089', 'step': 14399, 'epoch': 2} {'type': 'loss', 'content': 0.12584497034549713, 'timestamp': '2025-10-01 04:30:41.392804', 'step': 14400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:41.426837', 'step': 14400, 'epoch': 2} {'type': 'loss', 'content': 0.14694388210773468, 'timestamp': '2025-10-01 04:30:41.429041', 'step': 14401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:41.463126', 'step': 14401, 'epoch': 2} {'type': 'loss', 'content': 0.13671301305294037, 'timestamp': '2025-10-01 04:30:41.465208', 'step': 14402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:41.499228', 'step': 14402, 'epoch': 2} {'type': 'loss', 'content': 0.13625121116638184, 'timestamp': '2025-10-01 04:30:41.501393', 'step': 14403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:41.533736', 'step': 14403, 'epoch': 2} {'type': 'loss', 'content': 0.06990668922662735, 'timestamp': '2025-10-01 04:30:41.557529', 'step': 14404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:41.596654', 'step': 14404, 'epoch': 2} {'type': 'loss', 'content': 0.058781947940588, 'timestamp': '2025-10-01 04:30:41.598870', 'step': 14405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:41.636691', 'step': 14405, 'epoch': 2} {'type': 'loss', 'content': 0.11669668555259705, 'timestamp': '2025-10-01 04:30:41.639139', 'step': 14406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:41.683318', 'step': 14406, 'epoch': 2} {'type': 'loss', 'content': 0.09375746548175812, 'timestamp': '2025-10-01 04:30:41.685548', 'step': 14407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:41.726876', 'step': 14407, 'epoch': 2} {'type': 'loss', 'content': 0.020506009459495544, 'timestamp': '2025-10-01 04:30:41.750655', 'step': 14408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:41.788921', 'step': 14408, 'epoch': 2} {'type': 'loss', 'content': 0.10265544056892395, 'timestamp': '2025-10-01 04:30:41.792223', 'step': 14409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:30:41.827556', 'step': 14409, 'epoch': 2} {'type': 'loss', 'content': 0.16693679988384247, 'timestamp': '2025-10-01 04:30:41.829883', 'step': 14410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:41.867240', 'step': 14410, 'epoch': 2} {'type': 'loss', 'content': 0.07205469161272049, 'timestamp': '2025-10-01 04:30:41.869787', 'step': 14411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:41.907350', 'step': 14411, 'epoch': 2} {'type': 'loss', 'content': 0.06460821628570557, 'timestamp': '2025-10-01 04:30:41.931008', 'step': 14412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:41.975513', 'step': 14412, 'epoch': 2} {'type': 'loss', 'content': 0.13044367730617523, 'timestamp': '2025-10-01 04:30:41.977766', 'step': 14413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:42.009873', 'step': 14413, 'epoch': 2} {'type': 'loss', 'content': 0.06281934678554535, 'timestamp': '2025-10-01 04:30:42.012940', 'step': 14414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:42.053402', 'step': 14414, 'epoch': 2} {'type': 'loss', 'content': 0.183532252907753, 'timestamp': '2025-10-01 04:30:42.055807', 'step': 14415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:42.095177', 'step': 14415, 'epoch': 2} {'type': 'loss', 'content': 0.06009449064731598, 'timestamp': '2025-10-01 04:30:42.119060', 'step': 14416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:42.163529', 'step': 14416, 'epoch': 2} {'type': 'loss', 'content': 0.21198642253875732, 'timestamp': '2025-10-01 04:30:42.165787', 'step': 14417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:42.198061', 'step': 14417, 'epoch': 2} {'type': 'loss', 'content': 0.08833346515893936, 'timestamp': '2025-10-01 04:30:42.200755', 'step': 14418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:42.232125', 'step': 14418, 'epoch': 2} {'type': 'loss', 'content': 0.1667286902666092, 'timestamp': '2025-10-01 04:30:42.234389', 'step': 14419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:42.275213', 'step': 14419, 'epoch': 2} {'type': 'loss', 'content': 0.09734801203012466, 'timestamp': '2025-10-01 04:30:42.298781', 'step': 14420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:42.331788', 'step': 14420, 'epoch': 2} {'type': 'loss', 'content': 0.13934975862503052, 'timestamp': '2025-10-01 04:30:42.334161', 'step': 14421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:42.367320', 'step': 14421, 'epoch': 2} {'type': 'loss', 'content': 0.13149584829807281, 'timestamp': '2025-10-01 04:30:42.369578', 'step': 14422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:42.402101', 'step': 14422, 'epoch': 2} {'type': 'loss', 'content': 0.09096350520849228, 'timestamp': '2025-10-01 04:30:42.404431', 'step': 14423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:42.448029', 'step': 14423, 'epoch': 2} {'type': 'loss', 'content': 0.0473189502954483, 'timestamp': '2025-10-01 04:30:42.471869', 'step': 14424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:42.504507', 'step': 14424, 'epoch': 2} {'type': 'loss', 'content': 0.11256679147481918, 'timestamp': '2025-10-01 04:30:42.506721', 'step': 14425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:42.544717', 'step': 14425, 'epoch': 2} {'type': 'loss', 'content': 0.07863599807024002, 'timestamp': '2025-10-01 04:30:42.546870', 'step': 14426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:42.578472', 'step': 14426, 'epoch': 2} {'type': 'loss', 'content': 0.06907263398170471, 'timestamp': '2025-10-01 04:30:42.580638', 'step': 14427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:42.615632', 'step': 14427, 'epoch': 2} {'type': 'loss', 'content': 0.12600475549697876, 'timestamp': '2025-10-01 04:30:42.639281', 'step': 14428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:42.682777', 'step': 14428, 'epoch': 2} {'type': 'loss', 'content': 0.021185362711548805, 'timestamp': '2025-10-01 04:30:42.685504', 'step': 14429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:42.720285', 'step': 14429, 'epoch': 2} {'type': 'loss', 'content': 0.09722577035427094, 'timestamp': '2025-10-01 04:30:42.722757', 'step': 14430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:42.756951', 'step': 14430, 'epoch': 2} {'type': 'loss', 'content': 0.13798174262046814, 'timestamp': '2025-10-01 04:30:42.759528', 'step': 14431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:42.792207', 'step': 14431, 'epoch': 2} {'type': 'loss', 'content': 0.10494598746299744, 'timestamp': '2025-10-01 04:30:42.815812', 'step': 14432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:42.850658', 'step': 14432, 'epoch': 2} {'type': 'loss', 'content': 0.20927594602108002, 'timestamp': '2025-10-01 04:30:42.852896', 'step': 14433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:42.888821', 'step': 14433, 'epoch': 2} {'type': 'loss', 'content': 0.11011335998773575, 'timestamp': '2025-10-01 04:30:42.891087', 'step': 14434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:42.925315', 'step': 14434, 'epoch': 2} {'type': 'loss', 'content': 0.15664911270141602, 'timestamp': '2025-10-01 04:30:42.929363', 'step': 14435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:42.968267', 'step': 14435, 'epoch': 2} {'type': 'loss', 'content': 0.08543184399604797, 'timestamp': '2025-10-01 04:30:42.992031', 'step': 14436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:43.028450', 'step': 14436, 'epoch': 2} {'type': 'loss', 'content': 0.1339966207742691, 'timestamp': '2025-10-01 04:30:43.030810', 'step': 14437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:43.061270', 'step': 14437, 'epoch': 2} {'type': 'loss', 'content': 0.17347495257854462, 'timestamp': '2025-10-01 04:30:43.063475', 'step': 14438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:43.094596', 'step': 14438, 'epoch': 2} {'type': 'loss', 'content': 0.06524938344955444, 'timestamp': '2025-10-01 04:30:43.096894', 'step': 14439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:43.128951', 'step': 14439, 'epoch': 2} {'type': 'loss', 'content': 0.11539120227098465, 'timestamp': '2025-10-01 04:30:43.152618', 'step': 14440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:43.183431', 'step': 14440, 'epoch': 2} {'type': 'loss', 'content': 0.10403957217931747, 'timestamp': '2025-10-01 04:30:43.185709', 'step': 14441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:43.216299', 'step': 14441, 'epoch': 2} {'type': 'loss', 'content': 0.04551200568675995, 'timestamp': '2025-10-01 04:30:43.218681', 'step': 14442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:43.248749', 'step': 14442, 'epoch': 2} {'type': 'loss', 'content': 0.13924288749694824, 'timestamp': '2025-10-01 04:30:43.251729', 'step': 14443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:43.291066', 'step': 14443, 'epoch': 2} {'type': 'loss', 'content': 0.06219283118844032, 'timestamp': '2025-10-01 04:30:43.319645', 'step': 14444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:43.357988', 'step': 14444, 'epoch': 2} {'type': 'loss', 'content': 0.06521612405776978, 'timestamp': '2025-10-01 04:30:43.360578', 'step': 14445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:43.392846', 'step': 14445, 'epoch': 2} {'type': 'loss', 'content': 0.13376396894454956, 'timestamp': '2025-10-01 04:30:43.395280', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:30:54.032463', 'step': 14446, 'epoch': 2} {'type': 'pplx', 'content': 11585.092317451905, 'timestamp': '2025-10-01 04:30:54.049500', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:54.080307', 'step': 14446, 'epoch': 2} {'type': 'loss', 'content': 0.15363028645515442, 'timestamp': '2025-10-01 04:30:54.082691', 'step': 14447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:54.122837', 'step': 14447, 'epoch': 2} {'type': 'loss', 'content': 0.09246081858873367, 'timestamp': '2025-10-01 04:30:54.147070', 'step': 14448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:54.180441', 'step': 14448, 'epoch': 2} {'type': 'loss', 'content': 0.049923766404390335, 'timestamp': '2025-10-01 04:30:54.182917', 'step': 14449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:54.223017', 'step': 14449, 'epoch': 2} {'type': 'loss', 'content': 0.12727519869804382, 'timestamp': '2025-10-01 04:30:54.225364', 'step': 14450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:54.262426', 'step': 14450, 'epoch': 2} {'type': 'loss', 'content': 0.13037501275539398, 'timestamp': '2025-10-01 04:30:54.265163', 'step': 14451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:54.312128', 'step': 14451, 'epoch': 2} {'type': 'loss', 'content': 0.10504567623138428, 'timestamp': '2025-10-01 04:30:54.336377', 'step': 14452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:54.370550', 'step': 14452, 'epoch': 2} {'type': 'loss', 'content': 0.12548735737800598, 'timestamp': '2025-10-01 04:30:54.373042', 'step': 14453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:54.406340', 'step': 14453, 'epoch': 2} {'type': 'loss', 'content': 0.057196445763111115, 'timestamp': '2025-10-01 04:30:54.408833', 'step': 14454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:54.455844', 'step': 14454, 'epoch': 2} {'type': 'loss', 'content': 0.19042718410491943, 'timestamp': '2025-10-01 04:30:54.458265', 'step': 14455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:54.505679', 'step': 14455, 'epoch': 2} {'type': 'loss', 'content': 0.05737543851137161, 'timestamp': '2025-10-01 04:30:54.529562', 'step': 14456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:54.561499', 'step': 14456, 'epoch': 2} {'type': 'loss', 'content': 0.07989814877510071, 'timestamp': '2025-10-01 04:30:54.563941', 'step': 14457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:54.596509', 'step': 14457, 'epoch': 2} {'type': 'loss', 'content': 0.16080111265182495, 'timestamp': '2025-10-01 04:30:54.598766', 'step': 14458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:54.631045', 'step': 14458, 'epoch': 2} {'type': 'loss', 'content': 0.1174468845129013, 'timestamp': '2025-10-01 04:30:54.633464', 'step': 14459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:54.673161', 'step': 14459, 'epoch': 2} {'type': 'loss', 'content': 0.21765846014022827, 'timestamp': '2025-10-01 04:30:54.697079', 'step': 14460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:54.744896', 'step': 14460, 'epoch': 2} {'type': 'loss', 'content': 0.018427565693855286, 'timestamp': '2025-10-01 04:30:54.747398', 'step': 14461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:54.779933', 'step': 14461, 'epoch': 2} {'type': 'loss', 'content': 0.10026995837688446, 'timestamp': '2025-10-01 04:30:54.782564', 'step': 14462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:54.814568', 'step': 14462, 'epoch': 2} {'type': 'loss', 'content': 0.15249477326869965, 'timestamp': '2025-10-01 04:30:54.816988', 'step': 14463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:54.854097', 'step': 14463, 'epoch': 2} {'type': 'loss', 'content': 0.10992785543203354, 'timestamp': '2025-10-01 04:30:54.877552', 'step': 14464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:54.910216', 'step': 14464, 'epoch': 2} {'type': 'loss', 'content': 0.08333848416805267, 'timestamp': '2025-10-01 04:30:54.912778', 'step': 14465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:54.946463', 'step': 14465, 'epoch': 2} {'type': 'loss', 'content': 0.07649880647659302, 'timestamp': '2025-10-01 04:30:54.948949', 'step': 14466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:54.986975', 'step': 14466, 'epoch': 2} {'type': 'loss', 'content': 0.08896282315254211, 'timestamp': '2025-10-01 04:30:54.989315', 'step': 14467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:55.022724', 'step': 14467, 'epoch': 2} {'type': 'loss', 'content': 0.07225816696882248, 'timestamp': '2025-10-01 04:30:55.046905', 'step': 14468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:55.085131', 'step': 14468, 'epoch': 2} {'type': 'loss', 'content': 0.13491740822792053, 'timestamp': '2025-10-01 04:30:55.087216', 'step': 14469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:55.118982', 'step': 14469, 'epoch': 2} {'type': 'loss', 'content': 0.058964043855667114, 'timestamp': '2025-10-01 04:30:55.121170', 'step': 14470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:55.153630', 'step': 14470, 'epoch': 2} {'type': 'loss', 'content': 0.10657874494791031, 'timestamp': '2025-10-01 04:30:55.155753', 'step': 14471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:55.201757', 'step': 14471, 'epoch': 2} {'type': 'loss', 'content': 0.10660640150308609, 'timestamp': '2025-10-01 04:30:55.225383', 'step': 14472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:55.268136', 'step': 14472, 'epoch': 2} {'type': 'loss', 'content': 0.1390228420495987, 'timestamp': '2025-10-01 04:30:55.277437', 'step': 14473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:55.317879', 'step': 14473, 'epoch': 2} {'type': 'loss', 'content': 0.1619616001844406, 'timestamp': '2025-10-01 04:30:55.320042', 'step': 14474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:55.360840', 'step': 14474, 'epoch': 2} {'type': 'loss', 'content': 0.08636093884706497, 'timestamp': '2025-10-01 04:30:55.365268', 'step': 14475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:55.400692', 'step': 14475, 'epoch': 2} {'type': 'loss', 'content': 0.10990500450134277, 'timestamp': '2025-10-01 04:30:55.424394', 'step': 14476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:55.467296', 'step': 14476, 'epoch': 2} {'type': 'loss', 'content': 0.16845495998859406, 'timestamp': '2025-10-01 04:30:55.469865', 'step': 14477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:55.508874', 'step': 14477, 'epoch': 2} {'type': 'loss', 'content': 0.15590055286884308, 'timestamp': '2025-10-01 04:30:55.512278', 'step': 14478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:55.551973', 'step': 14478, 'epoch': 2} {'type': 'loss', 'content': 0.15152660012245178, 'timestamp': '2025-10-01 04:30:55.555182', 'step': 14479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:55.597509', 'step': 14479, 'epoch': 2} {'type': 'loss', 'content': 0.05341951921582222, 'timestamp': '2025-10-01 04:30:55.621125', 'step': 14480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:55.654445', 'step': 14480, 'epoch': 2} {'type': 'loss', 'content': 0.10582903772592545, 'timestamp': '2025-10-01 04:30:55.656681', 'step': 14481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:30:55.699092', 'step': 14481, 'epoch': 2} {'type': 'loss', 'content': 0.09085004776716232, 'timestamp': '2025-10-01 04:30:55.701370', 'step': 14482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:55.735629', 'step': 14482, 'epoch': 2} {'type': 'loss', 'content': 0.08627073466777802, 'timestamp': '2025-10-01 04:30:55.737902', 'step': 14483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:55.770406', 'step': 14483, 'epoch': 2} {'type': 'loss', 'content': 0.07204194366931915, 'timestamp': '2025-10-01 04:30:55.794076', 'step': 14484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:55.832208', 'step': 14484, 'epoch': 2} {'type': 'loss', 'content': 0.12333013862371445, 'timestamp': '2025-10-01 04:30:55.836343', 'step': 14485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:55.892805', 'step': 14485, 'epoch': 2} {'type': 'loss', 'content': 0.1613708883523941, 'timestamp': '2025-10-01 04:30:55.895039', 'step': 14486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:55.943234', 'step': 14486, 'epoch': 2} {'type': 'loss', 'content': 0.10898666828870773, 'timestamp': '2025-10-01 04:30:55.949354', 'step': 14487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:55.996374', 'step': 14487, 'epoch': 2} {'type': 'loss', 'content': 0.07341231405735016, 'timestamp': '2025-10-01 04:30:56.020044', 'step': 14488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:30:56.058196', 'step': 14488, 'epoch': 2} {'type': 'loss', 'content': 0.07691173255443573, 'timestamp': '2025-10-01 04:30:56.060352', 'step': 14489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:56.108207', 'step': 14489, 'epoch': 2} {'type': 'loss', 'content': 0.107577383518219, 'timestamp': '2025-10-01 04:30:56.113006', 'step': 14490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:56.154243', 'step': 14490, 'epoch': 2} {'type': 'loss', 'content': 0.03648878261446953, 'timestamp': '2025-10-01 04:30:56.156391', 'step': 14491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:56.203298', 'step': 14491, 'epoch': 2} {'type': 'loss', 'content': 0.06795649230480194, 'timestamp': '2025-10-01 04:30:56.226958', 'step': 14492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:56.266499', 'step': 14492, 'epoch': 2} {'type': 'loss', 'content': 0.1250251829624176, 'timestamp': '2025-10-01 04:30:56.268625', 'step': 14493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:56.307619', 'step': 14493, 'epoch': 2} {'type': 'loss', 'content': 0.11444069445133209, 'timestamp': '2025-10-01 04:30:56.309665', 'step': 14494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:56.348731', 'step': 14494, 'epoch': 2} {'type': 'loss', 'content': 0.05770081281661987, 'timestamp': '2025-10-01 04:30:56.352616', 'step': 14495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:30:56.386096', 'step': 14495, 'epoch': 2} {'type': 'loss', 'content': 0.08047840744256973, 'timestamp': '2025-10-01 04:30:56.409912', 'step': 14496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:30:56.460227', 'step': 14496, 'epoch': 2} {'type': 'loss', 'content': 0.11553259938955307, 'timestamp': '2025-10-01 04:30:56.464867', 'step': 14497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:56.504102', 'step': 14497, 'epoch': 2} {'type': 'loss', 'content': 0.06573738157749176, 'timestamp': '2025-10-01 04:30:56.506944', 'step': 14498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:30:56.554682', 'step': 14498, 'epoch': 2} {'type': 'loss', 'content': 0.12181907147169113, 'timestamp': '2025-10-01 04:30:56.557225', 'step': 14499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:30:56.596220', 'step': 14499, 'epoch': 2} {'type': 'loss', 'content': 0.15637890994548798, 'timestamp': '2025-10-01 04:30:56.620099', 'step': 14500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14500', 'timestamp': '2025-10-01 04:31:01.992202', 'step': 14500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.045369', 'step': 14500, 'epoch': 2} {'type': 'loss', 'content': 0.06747005134820938, 'timestamp': '2025-10-01 04:31:02.047589', 'step': 14501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.085465', 'step': 14501, 'epoch': 2} {'type': 'loss', 'content': 0.14159609377384186, 'timestamp': '2025-10-01 04:31:02.087938', 'step': 14502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.121937', 'step': 14502, 'epoch': 2} {'type': 'loss', 'content': 0.0715886577963829, 'timestamp': '2025-10-01 04:31:02.124239', 'step': 14503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:02.159987', 'step': 14503, 'epoch': 2} {'type': 'loss', 'content': 0.0870688259601593, 'timestamp': '2025-10-01 04:31:02.183702', 'step': 14504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:02.231721', 'step': 14504, 'epoch': 2} {'type': 'loss', 'content': 0.10201654583215714, 'timestamp': '2025-10-01 04:31:02.233862', 'step': 14505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.277287', 'step': 14505, 'epoch': 2} {'type': 'loss', 'content': 0.05425947532057762, 'timestamp': '2025-10-01 04:31:02.279437', 'step': 14506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.321928', 'step': 14506, 'epoch': 2} {'type': 'loss', 'content': 0.12391316145658493, 'timestamp': '2025-10-01 04:31:02.324289', 'step': 14507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.362414', 'step': 14507, 'epoch': 2} {'type': 'loss', 'content': 0.12591303884983063, 'timestamp': '2025-10-01 04:31:02.386009', 'step': 14508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.418807', 'step': 14508, 'epoch': 2} {'type': 'loss', 'content': 0.07829481363296509, 'timestamp': '2025-10-01 04:31:02.421073', 'step': 14509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.453572', 'step': 14509, 'epoch': 2} {'type': 'loss', 'content': 0.08346477895975113, 'timestamp': '2025-10-01 04:31:02.455909', 'step': 14510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.497781', 'step': 14510, 'epoch': 2} {'type': 'loss', 'content': 0.1152140200138092, 'timestamp': '2025-10-01 04:31:02.500200', 'step': 14511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:02.535505', 'step': 14511, 'epoch': 2} {'type': 'loss', 'content': 0.18026232719421387, 'timestamp': '2025-10-01 04:31:02.559124', 'step': 14512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.589563', 'step': 14512, 'epoch': 2} {'type': 'loss', 'content': 0.1261972337961197, 'timestamp': '2025-10-01 04:31:02.591683', 'step': 14513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:02.624782', 'step': 14513, 'epoch': 2} {'type': 'loss', 'content': 0.11466507613658905, 'timestamp': '2025-10-01 04:31:02.627127', 'step': 14514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:02.660554', 'step': 14514, 'epoch': 2} {'type': 'loss', 'content': 0.12146373093128204, 'timestamp': '2025-10-01 04:31:02.662626', 'step': 14515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.694120', 'step': 14515, 'epoch': 2} {'type': 'loss', 'content': 0.08848181366920471, 'timestamp': '2025-10-01 04:31:02.717682', 'step': 14516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:02.757685', 'step': 14516, 'epoch': 2} {'type': 'loss', 'content': 0.1652621179819107, 'timestamp': '2025-10-01 04:31:02.760445', 'step': 14517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:02.792275', 'step': 14517, 'epoch': 2} {'type': 'loss', 'content': 0.14286424219608307, 'timestamp': '2025-10-01 04:31:02.794720', 'step': 14518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:02.829141', 'step': 14518, 'epoch': 2} {'type': 'loss', 'content': 0.2112865149974823, 'timestamp': '2025-10-01 04:31:02.832014', 'step': 14519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:02.864756', 'step': 14519, 'epoch': 2} {'type': 'loss', 'content': 0.24215352535247803, 'timestamp': '2025-10-01 04:31:02.888497', 'step': 14520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:02.925085', 'step': 14520, 'epoch': 2} {'type': 'loss', 'content': 0.10165593773126602, 'timestamp': '2025-10-01 04:31:02.927231', 'step': 14521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:02.958005', 'step': 14521, 'epoch': 2} {'type': 'loss', 'content': 0.17291666567325592, 'timestamp': '2025-10-01 04:31:02.960907', 'step': 14522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:02.997854', 'step': 14522, 'epoch': 2} {'type': 'loss', 'content': 0.13973462581634521, 'timestamp': '2025-10-01 04:31:03.000173', 'step': 14523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:03.033988', 'step': 14523, 'epoch': 2} {'type': 'loss', 'content': 0.12161040306091309, 'timestamp': '2025-10-01 04:31:03.057754', 'step': 14524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:03.089245', 'step': 14524, 'epoch': 2} {'type': 'loss', 'content': 0.0940152183175087, 'timestamp': '2025-10-01 04:31:03.094286', 'step': 14525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:03.152783', 'step': 14525, 'epoch': 2} {'type': 'loss', 'content': 0.16629141569137573, 'timestamp': '2025-10-01 04:31:03.154944', 'step': 14526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:31:03.201233', 'step': 14526, 'epoch': 2} {'type': 'loss', 'content': 0.15969033539295197, 'timestamp': '2025-10-01 04:31:03.205628', 'step': 14527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:03.253363', 'step': 14527, 'epoch': 2} {'type': 'loss', 'content': 0.028962038457393646, 'timestamp': '2025-10-01 04:31:03.277073', 'step': 14528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:03.314709', 'step': 14528, 'epoch': 2} {'type': 'loss', 'content': 0.09023592621088028, 'timestamp': '2025-10-01 04:31:03.326659', 'step': 14529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:03.359518', 'step': 14529, 'epoch': 2} {'type': 'loss', 'content': 0.08842409402132034, 'timestamp': '2025-10-01 04:31:03.362144', 'step': 14530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:03.394845', 'step': 14530, 'epoch': 2} {'type': 'loss', 'content': 0.10742350667715073, 'timestamp': '2025-10-01 04:31:03.397043', 'step': 14531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:03.442911', 'step': 14531, 'epoch': 2} {'type': 'loss', 'content': 0.12416858226060867, 'timestamp': '2025-10-01 04:31:03.466511', 'step': 14532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:03.506954', 'step': 14532, 'epoch': 2} {'type': 'loss', 'content': 0.0796005055308342, 'timestamp': '2025-10-01 04:31:03.509060', 'step': 14533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:03.547737', 'step': 14533, 'epoch': 2} {'type': 'loss', 'content': 0.10002702474594116, 'timestamp': '2025-10-01 04:31:03.550022', 'step': 14534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:03.595379', 'step': 14534, 'epoch': 2} {'type': 'loss', 'content': 0.05747082829475403, 'timestamp': '2025-10-01 04:31:03.597480', 'step': 14535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:03.637011', 'step': 14535, 'epoch': 2} {'type': 'loss', 'content': 0.11766908317804337, 'timestamp': '2025-10-01 04:31:03.660649', 'step': 14536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:03.692596', 'step': 14536, 'epoch': 2} {'type': 'loss', 'content': 0.06426717340946198, 'timestamp': '2025-10-01 04:31:03.694748', 'step': 14537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:03.726749', 'step': 14537, 'epoch': 2} {'type': 'loss', 'content': 0.13273002207279205, 'timestamp': '2025-10-01 04:31:03.729021', 'step': 14538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:03.760987', 'step': 14538, 'epoch': 2} {'type': 'loss', 'content': 0.09124710410833359, 'timestamp': '2025-10-01 04:31:03.763391', 'step': 14539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:03.801618', 'step': 14539, 'epoch': 2} {'type': 'loss', 'content': 0.07971878349781036, 'timestamp': '2025-10-01 04:31:03.825325', 'step': 14540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:03.864905', 'step': 14540, 'epoch': 2} {'type': 'loss', 'content': 0.13627788424491882, 'timestamp': '2025-10-01 04:31:03.867128', 'step': 14541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:03.906526', 'step': 14541, 'epoch': 2} {'type': 'loss', 'content': 0.08428274095058441, 'timestamp': '2025-10-01 04:31:03.908736', 'step': 14542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:03.948977', 'step': 14542, 'epoch': 2} {'type': 'loss', 'content': 0.09276281297206879, 'timestamp': '2025-10-01 04:31:03.951128', 'step': 14543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:03.984191', 'step': 14543, 'epoch': 2} {'type': 'loss', 'content': 0.15007638931274414, 'timestamp': '2025-10-01 04:31:04.007850', 'step': 14544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:04.044364', 'step': 14544, 'epoch': 2} {'type': 'loss', 'content': 0.14842328429222107, 'timestamp': '2025-10-01 04:31:04.046624', 'step': 14545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:04.091835', 'step': 14545, 'epoch': 2} {'type': 'loss', 'content': 0.14461548626422882, 'timestamp': '2025-10-01 04:31:04.094038', 'step': 14546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:04.126622', 'step': 14546, 'epoch': 2} {'type': 'loss', 'content': 0.12692110240459442, 'timestamp': '2025-10-01 04:31:04.128748', 'step': 14547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:04.164196', 'step': 14547, 'epoch': 2} {'type': 'loss', 'content': 0.11030196398496628, 'timestamp': '2025-10-01 04:31:04.188076', 'step': 14548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:04.224468', 'step': 14548, 'epoch': 2} {'type': 'loss', 'content': 0.09788517653942108, 'timestamp': '2025-10-01 04:31:04.227040', 'step': 14549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:04.262296', 'step': 14549, 'epoch': 2} {'type': 'loss', 'content': 0.021219702437520027, 'timestamp': '2025-10-01 04:31:04.264336', 'step': 14550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:04.298511', 'step': 14550, 'epoch': 2} {'type': 'loss', 'content': 0.0805787593126297, 'timestamp': '2025-10-01 04:31:04.300663', 'step': 14551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:04.332201', 'step': 14551, 'epoch': 2} {'type': 'loss', 'content': 0.04358765855431557, 'timestamp': '2025-10-01 04:31:04.355892', 'step': 14552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:04.389844', 'step': 14552, 'epoch': 2} {'type': 'loss', 'content': 0.06924839317798615, 'timestamp': '2025-10-01 04:31:04.392169', 'step': 14553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:04.427125', 'step': 14553, 'epoch': 2} {'type': 'loss', 'content': 0.03829478472471237, 'timestamp': '2025-10-01 04:31:04.429477', 'step': 14554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:04.471625', 'step': 14554, 'epoch': 2} {'type': 'loss', 'content': 0.17132093012332916, 'timestamp': '2025-10-01 04:31:04.473882', 'step': 14555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:04.515418', 'step': 14555, 'epoch': 2} {'type': 'loss', 'content': 0.1358521431684494, 'timestamp': '2025-10-01 04:31:04.539051', 'step': 14556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:04.578544', 'step': 14556, 'epoch': 2} {'type': 'loss', 'content': 0.11274684220552444, 'timestamp': '2025-10-01 04:31:04.580653', 'step': 14557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:04.612024', 'step': 14557, 'epoch': 2} {'type': 'loss', 'content': 0.12450748682022095, 'timestamp': '2025-10-01 04:31:04.614940', 'step': 14558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:04.666641', 'step': 14558, 'epoch': 2} {'type': 'loss', 'content': 0.06500151008367538, 'timestamp': '2025-10-01 04:31:04.668741', 'step': 14559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:04.704740', 'step': 14559, 'epoch': 2} {'type': 'loss', 'content': 0.0565488338470459, 'timestamp': '2025-10-01 04:31:04.728817', 'step': 14560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:04.760696', 'step': 14560, 'epoch': 2} {'type': 'loss', 'content': 0.1433977633714676, 'timestamp': '2025-10-01 04:31:04.762798', 'step': 14561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:04.806795', 'step': 14561, 'epoch': 2} {'type': 'loss', 'content': 0.17525333166122437, 'timestamp': '2025-10-01 04:31:04.809001', 'step': 14562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:04.848800', 'step': 14562, 'epoch': 2} {'type': 'loss', 'content': 0.08061747252941132, 'timestamp': '2025-10-01 04:31:04.851064', 'step': 14563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:04.892404', 'step': 14563, 'epoch': 2} {'type': 'loss', 'content': 0.17836707830429077, 'timestamp': '2025-10-01 04:31:04.917325', 'step': 14564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:04.950465', 'step': 14564, 'epoch': 2} {'type': 'loss', 'content': 0.07603982836008072, 'timestamp': '2025-10-01 04:31:04.952638', 'step': 14565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:04.984942', 'step': 14565, 'epoch': 2} {'type': 'loss', 'content': 0.11034374684095383, 'timestamp': '2025-10-01 04:31:04.987135', 'step': 14566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:05.018187', 'step': 14566, 'epoch': 2} {'type': 'loss', 'content': 0.03815653175115585, 'timestamp': '2025-10-01 04:31:05.020758', 'step': 14567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:31:05.053670', 'step': 14567, 'epoch': 2} {'type': 'loss', 'content': 0.07313201576471329, 'timestamp': '2025-10-01 04:31:05.079040', 'step': 14568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:05.114058', 'step': 14568, 'epoch': 2} {'type': 'loss', 'content': 0.030463164672255516, 'timestamp': '2025-10-01 04:31:05.116169', 'step': 14569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:05.149887', 'step': 14569, 'epoch': 2} {'type': 'loss', 'content': 0.08695030957460403, 'timestamp': '2025-10-01 04:31:05.152266', 'step': 14570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:05.182660', 'step': 14570, 'epoch': 2} {'type': 'loss', 'content': 0.08756335079669952, 'timestamp': '2025-10-01 04:31:05.185226', 'step': 14571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:05.221797', 'step': 14571, 'epoch': 2} {'type': 'loss', 'content': 0.1527501493692398, 'timestamp': '2025-10-01 04:31:05.245592', 'step': 14572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:05.282809', 'step': 14572, 'epoch': 2} {'type': 'loss', 'content': 0.10781878232955933, 'timestamp': '2025-10-01 04:31:05.285041', 'step': 14573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:05.315690', 'step': 14573, 'epoch': 2} {'type': 'loss', 'content': 0.04317568242549896, 'timestamp': '2025-10-01 04:31:05.320425', 'step': 14574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:05.358034', 'step': 14574, 'epoch': 2} {'type': 'loss', 'content': 0.10758503526449203, 'timestamp': '2025-10-01 04:31:05.362759', 'step': 14575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:05.398288', 'step': 14575, 'epoch': 2} {'type': 'loss', 'content': 0.04709595814347267, 'timestamp': '2025-10-01 04:31:05.422076', 'step': 14576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:05.456873', 'step': 14576, 'epoch': 2} {'type': 'loss', 'content': 0.13931983709335327, 'timestamp': '2025-10-01 04:31:05.459190', 'step': 14577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:05.496819', 'step': 14577, 'epoch': 2} {'type': 'loss', 'content': 0.21136128902435303, 'timestamp': '2025-10-01 04:31:05.499419', 'step': 14578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:05.534935', 'step': 14578, 'epoch': 2} {'type': 'loss', 'content': 0.03953851759433746, 'timestamp': '2025-10-01 04:31:05.537096', 'step': 14579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:05.572197', 'step': 14579, 'epoch': 2} {'type': 'loss', 'content': 0.09398596733808517, 'timestamp': '2025-10-01 04:31:05.595921', 'step': 14580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:05.630254', 'step': 14580, 'epoch': 2} {'type': 'loss', 'content': 0.12294367700815201, 'timestamp': '2025-10-01 04:31:05.632406', 'step': 14581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:05.667927', 'step': 14581, 'epoch': 2} {'type': 'loss', 'content': 0.045335590839385986, 'timestamp': '2025-10-01 04:31:05.670578', 'step': 14582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:05.704099', 'step': 14582, 'epoch': 2} {'type': 'loss', 'content': 0.08294209092855453, 'timestamp': '2025-10-01 04:31:05.706380', 'step': 14583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:05.739537', 'step': 14583, 'epoch': 2} {'type': 'loss', 'content': 0.0687669962644577, 'timestamp': '2025-10-01 04:31:05.763251', 'step': 14584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:05.793842', 'step': 14584, 'epoch': 2} {'type': 'loss', 'content': 0.06594342738389969, 'timestamp': '2025-10-01 04:31:05.796089', 'step': 14585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:05.830031', 'step': 14585, 'epoch': 2} {'type': 'loss', 'content': 0.06848811358213425, 'timestamp': '2025-10-01 04:31:05.832241', 'step': 14586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:05.865703', 'step': 14586, 'epoch': 2} {'type': 'loss', 'content': 0.05260707810521126, 'timestamp': '2025-10-01 04:31:05.868427', 'step': 14587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:05.901962', 'step': 14587, 'epoch': 2} {'type': 'loss', 'content': 0.1737184375524521, 'timestamp': '2025-10-01 04:31:05.925963', 'step': 14588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:05.958775', 'step': 14588, 'epoch': 2} {'type': 'loss', 'content': 0.08573546260595322, 'timestamp': '2025-10-01 04:31:05.960723', 'step': 14589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:05.993180', 'step': 14589, 'epoch': 2} {'type': 'loss', 'content': 0.16061773896217346, 'timestamp': '2025-10-01 04:31:05.995798', 'step': 14590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:06.027325', 'step': 14590, 'epoch': 2} {'type': 'loss', 'content': 0.06907624751329422, 'timestamp': '2025-10-01 04:31:06.030071', 'step': 14591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:06.060473', 'step': 14591, 'epoch': 2} {'type': 'loss', 'content': 0.06725210696458817, 'timestamp': '2025-10-01 04:31:06.084144', 'step': 14592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:06.118119', 'step': 14592, 'epoch': 2} {'type': 'loss', 'content': 0.1041790097951889, 'timestamp': '2025-10-01 04:31:06.120205', 'step': 14593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:06.155341', 'step': 14593, 'epoch': 2} {'type': 'loss', 'content': 0.06659786403179169, 'timestamp': '2025-10-01 04:31:06.157463', 'step': 14594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:06.190019', 'step': 14594, 'epoch': 2} {'type': 'loss', 'content': 0.09368922561407089, 'timestamp': '2025-10-01 04:31:06.192368', 'step': 14595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:06.223216', 'step': 14595, 'epoch': 2} {'type': 'loss', 'content': 0.10572780668735504, 'timestamp': '2025-10-01 04:31:06.247103', 'step': 14596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:06.280644', 'step': 14596, 'epoch': 2} {'type': 'loss', 'content': 0.040117017924785614, 'timestamp': '2025-10-01 04:31:06.283057', 'step': 14597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:06.319571', 'step': 14597, 'epoch': 2} {'type': 'loss', 'content': 0.06143103912472725, 'timestamp': '2025-10-01 04:31:06.323758', 'step': 14598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:06.357481', 'step': 14598, 'epoch': 2} {'type': 'loss', 'content': 0.04197264835238457, 'timestamp': '2025-10-01 04:31:06.360138', 'step': 14599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:06.393882', 'step': 14599, 'epoch': 2} {'type': 'loss', 'content': 0.0542302206158638, 'timestamp': '2025-10-01 04:31:06.417587', 'step': 14600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:06.469957', 'step': 14600, 'epoch': 2} {'type': 'loss', 'content': 0.06932847946882248, 'timestamp': '2025-10-01 04:31:06.472184', 'step': 14601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:06.534502', 'step': 14601, 'epoch': 2} {'type': 'loss', 'content': 0.06428999453783035, 'timestamp': '2025-10-01 04:31:06.536615', 'step': 14602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:06.582389', 'step': 14602, 'epoch': 2} {'type': 'loss', 'content': 0.09483839571475983, 'timestamp': '2025-10-01 04:31:06.589401', 'step': 14603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:06.664580', 'step': 14603, 'epoch': 2} {'type': 'loss', 'content': 0.13739067316055298, 'timestamp': '2025-10-01 04:31:06.688541', 'step': 14604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:06.724963', 'step': 14604, 'epoch': 2} {'type': 'loss', 'content': 0.04290015250444412, 'timestamp': '2025-10-01 04:31:06.730656', 'step': 14605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:06.771449', 'step': 14605, 'epoch': 2} {'type': 'loss', 'content': 0.13532328605651855, 'timestamp': '2025-10-01 04:31:06.778756', 'step': 14606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:06.832043', 'step': 14606, 'epoch': 2} {'type': 'loss', 'content': 0.0510544590651989, 'timestamp': '2025-10-01 04:31:06.834365', 'step': 14607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:06.874178', 'step': 14607, 'epoch': 2} {'type': 'loss', 'content': 0.06648442149162292, 'timestamp': '2025-10-01 04:31:06.898132', 'step': 14608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:06.931855', 'step': 14608, 'epoch': 2} {'type': 'loss', 'content': 0.13631029427051544, 'timestamp': '2025-10-01 04:31:06.933808', 'step': 14609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:06.974744', 'step': 14609, 'epoch': 2} {'type': 'loss', 'content': 0.09661643207073212, 'timestamp': '2025-10-01 04:31:06.976965', 'step': 14610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:07.010306', 'step': 14610, 'epoch': 2} {'type': 'loss', 'content': 0.057960763573646545, 'timestamp': '2025-10-01 04:31:07.012678', 'step': 14611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:07.055708', 'step': 14611, 'epoch': 2} {'type': 'loss', 'content': 0.1622464805841446, 'timestamp': '2025-10-01 04:31:07.080012', 'step': 14612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:07.125035', 'step': 14612, 'epoch': 2} {'type': 'loss', 'content': 0.07265406101942062, 'timestamp': '2025-10-01 04:31:07.129164', 'step': 14613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:07.169915', 'step': 14613, 'epoch': 2} {'type': 'loss', 'content': 0.10222561657428741, 'timestamp': '2025-10-01 04:31:07.174418', 'step': 14614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:07.207561', 'step': 14614, 'epoch': 2} {'type': 'loss', 'content': 0.06334882229566574, 'timestamp': '2025-10-01 04:31:07.210209', 'step': 14615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:07.260962', 'step': 14615, 'epoch': 2} {'type': 'loss', 'content': 0.08529917150735855, 'timestamp': '2025-10-01 04:31:07.284700', 'step': 14616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:07.336014', 'step': 14616, 'epoch': 2} {'type': 'loss', 'content': 0.08521905541419983, 'timestamp': '2025-10-01 04:31:07.340500', 'step': 14617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:31:07.377689', 'step': 14617, 'epoch': 2} {'type': 'loss', 'content': 0.11132954061031342, 'timestamp': '2025-10-01 04:31:07.382432', 'step': 14618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:07.429979', 'step': 14618, 'epoch': 2} {'type': 'loss', 'content': 0.1841142475605011, 'timestamp': '2025-10-01 04:31:07.432113', 'step': 14619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:07.463829', 'step': 14619, 'epoch': 2} {'type': 'loss', 'content': 0.044648896902799606, 'timestamp': '2025-10-01 04:31:07.487368', 'step': 14620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:07.519509', 'step': 14620, 'epoch': 2} {'type': 'loss', 'content': 0.22345703840255737, 'timestamp': '2025-10-01 04:31:07.521865', 'step': 14621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:07.553995', 'step': 14621, 'epoch': 2} {'type': 'loss', 'content': 0.12200279533863068, 'timestamp': '2025-10-01 04:31:07.555998', 'step': 14622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:07.587221', 'step': 14622, 'epoch': 2} {'type': 'loss', 'content': 0.10346390306949615, 'timestamp': '2025-10-01 04:31:07.589431', 'step': 14623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:07.620556', 'step': 14623, 'epoch': 2} {'type': 'loss', 'content': 0.1130513846874237, 'timestamp': '2025-10-01 04:31:07.644082', 'step': 14624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:07.675403', 'step': 14624, 'epoch': 2} {'type': 'loss', 'content': 0.10163645446300507, 'timestamp': '2025-10-01 04:31:07.677733', 'step': 14625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:07.708515', 'step': 14625, 'epoch': 2} {'type': 'loss', 'content': 0.07719369232654572, 'timestamp': '2025-10-01 04:31:07.711585', 'step': 14626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:07.743683', 'step': 14626, 'epoch': 2} {'type': 'loss', 'content': 0.16914361715316772, 'timestamp': '2025-10-01 04:31:07.745712', 'step': 14627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:07.777259', 'step': 14627, 'epoch': 2} {'type': 'loss', 'content': 0.08285324275493622, 'timestamp': '2025-10-01 04:31:07.801027', 'step': 14628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:07.831118', 'step': 14628, 'epoch': 2} {'type': 'loss', 'content': 0.10033045709133148, 'timestamp': '2025-10-01 04:31:07.833156', 'step': 14629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:07.863616', 'step': 14629, 'epoch': 2} {'type': 'loss', 'content': 0.06271464377641678, 'timestamp': '2025-10-01 04:31:07.865868', 'step': 14630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:07.896853', 'step': 14630, 'epoch': 2} {'type': 'loss', 'content': 0.10166275501251221, 'timestamp': '2025-10-01 04:31:07.899072', 'step': 14631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:07.928810', 'step': 14631, 'epoch': 2} {'type': 'loss', 'content': 0.1216547042131424, 'timestamp': '2025-10-01 04:31:07.952575', 'step': 14632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:07.983025', 'step': 14632, 'epoch': 2} {'type': 'loss', 'content': 0.14257147908210754, 'timestamp': '2025-10-01 04:31:07.985654', 'step': 14633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:08.015854', 'step': 14633, 'epoch': 2} {'type': 'loss', 'content': 0.06764290481805801, 'timestamp': '2025-10-01 04:31:08.017957', 'step': 14634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:08.048564', 'step': 14634, 'epoch': 2} {'type': 'loss', 'content': 0.0899873897433281, 'timestamp': '2025-10-01 04:31:08.050943', 'step': 14635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:08.081681', 'step': 14635, 'epoch': 2} {'type': 'loss', 'content': 0.12104681134223938, 'timestamp': '2025-10-01 04:31:08.105784', 'step': 14636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.136203', 'step': 14636, 'epoch': 2} {'type': 'loss', 'content': 0.16635744273662567, 'timestamp': '2025-10-01 04:31:08.138553', 'step': 14637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.169113', 'step': 14637, 'epoch': 2} {'type': 'loss', 'content': 0.07480420172214508, 'timestamp': '2025-10-01 04:31:08.171380', 'step': 14638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:08.204683', 'step': 14638, 'epoch': 2} {'type': 'loss', 'content': 0.05686274170875549, 'timestamp': '2025-10-01 04:31:08.207127', 'step': 14639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.238104', 'step': 14639, 'epoch': 2} {'type': 'loss', 'content': 0.11124522238969803, 'timestamp': '2025-10-01 04:31:08.262056', 'step': 14640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:08.293251', 'step': 14640, 'epoch': 2} {'type': 'loss', 'content': 0.15057654678821564, 'timestamp': '2025-10-01 04:31:08.295644', 'step': 14641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:08.327799', 'step': 14641, 'epoch': 2} {'type': 'loss', 'content': 0.10694189369678497, 'timestamp': '2025-10-01 04:31:08.330056', 'step': 14642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:08.360972', 'step': 14642, 'epoch': 2} {'type': 'loss', 'content': 0.05985743924975395, 'timestamp': '2025-10-01 04:31:08.363251', 'step': 14643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:08.394832', 'step': 14643, 'epoch': 2} {'type': 'loss', 'content': 0.14197739958763123, 'timestamp': '2025-10-01 04:31:08.418883', 'step': 14644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.449673', 'step': 14644, 'epoch': 2} {'type': 'loss', 'content': 0.1418832242488861, 'timestamp': '2025-10-01 04:31:08.451955', 'step': 14645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.482451', 'step': 14645, 'epoch': 2} {'type': 'loss', 'content': 0.07080034911632538, 'timestamp': '2025-10-01 04:31:08.484680', 'step': 14646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.515389', 'step': 14646, 'epoch': 2} {'type': 'loss', 'content': 0.15133877098560333, 'timestamp': '2025-10-01 04:31:08.527957', 'step': 14647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.558568', 'step': 14647, 'epoch': 2} {'type': 'loss', 'content': 0.1158411055803299, 'timestamp': '2025-10-01 04:31:08.582503', 'step': 14648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:08.612948', 'step': 14648, 'epoch': 2} {'type': 'loss', 'content': 0.12160691618919373, 'timestamp': '2025-10-01 04:31:08.615666', 'step': 14649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:08.648527', 'step': 14649, 'epoch': 2} {'type': 'loss', 'content': 0.06215998902916908, 'timestamp': '2025-10-01 04:31:08.652060', 'step': 14650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.682789', 'step': 14650, 'epoch': 2} {'type': 'loss', 'content': 0.1075705885887146, 'timestamp': '2025-10-01 04:31:08.685280', 'step': 14651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.716056', 'step': 14651, 'epoch': 2} {'type': 'loss', 'content': 0.1275556981563568, 'timestamp': '2025-10-01 04:31:08.740266', 'step': 14652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:08.770938', 'step': 14652, 'epoch': 2} {'type': 'loss', 'content': 0.16371046006679535, 'timestamp': '2025-10-01 04:31:08.773057', 'step': 14653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.805137', 'step': 14653, 'epoch': 2} {'type': 'loss', 'content': 0.11194372922182083, 'timestamp': '2025-10-01 04:31:08.807579', 'step': 14654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:08.845684', 'step': 14654, 'epoch': 2} {'type': 'loss', 'content': 0.13739155232906342, 'timestamp': '2025-10-01 04:31:08.848343', 'step': 14655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:08.878972', 'step': 14655, 'epoch': 2} {'type': 'loss', 'content': 0.09468787163496017, 'timestamp': '2025-10-01 04:31:08.902680', 'step': 14656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:08.932694', 'step': 14656, 'epoch': 2} {'type': 'loss', 'content': 0.10264527052640915, 'timestamp': '2025-10-01 04:31:08.934851', 'step': 14657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:08.973458', 'step': 14657, 'epoch': 2} {'type': 'loss', 'content': 0.03852066770195961, 'timestamp': '2025-10-01 04:31:08.975841', 'step': 14658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:09.006633', 'step': 14658, 'epoch': 2} {'type': 'loss', 'content': 0.07034190744161606, 'timestamp': '2025-10-01 04:31:09.008910', 'step': 14659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:09.039377', 'step': 14659, 'epoch': 2} {'type': 'loss', 'content': 0.08379776775836945, 'timestamp': '2025-10-01 04:31:09.063094', 'step': 14660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:09.094351', 'step': 14660, 'epoch': 2} {'type': 'loss', 'content': 0.03942817077040672, 'timestamp': '2025-10-01 04:31:09.096625', 'step': 14661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:09.133414', 'step': 14661, 'epoch': 2} {'type': 'loss', 'content': 0.10556578636169434, 'timestamp': '2025-10-01 04:31:09.137221', 'step': 14662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:09.168830', 'step': 14662, 'epoch': 2} {'type': 'loss', 'content': 0.18248297274112701, 'timestamp': '2025-10-01 04:31:09.171107', 'step': 14663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:09.202999', 'step': 14663, 'epoch': 2} {'type': 'loss', 'content': 0.09962733834981918, 'timestamp': '2025-10-01 04:31:09.226657', 'step': 14664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:09.258003', 'step': 14664, 'epoch': 2} {'type': 'loss', 'content': 0.11556914448738098, 'timestamp': '2025-10-01 04:31:09.260240', 'step': 14665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:09.291630', 'step': 14665, 'epoch': 2} {'type': 'loss', 'content': 0.07880070805549622, 'timestamp': '2025-10-01 04:31:09.296070', 'step': 14666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:09.328242', 'step': 14666, 'epoch': 2} {'type': 'loss', 'content': 0.21262851357460022, 'timestamp': '2025-10-01 04:31:09.330401', 'step': 14667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:09.360918', 'step': 14667, 'epoch': 2} {'type': 'loss', 'content': 0.12820711731910706, 'timestamp': '2025-10-01 04:31:09.384919', 'step': 14668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:09.416791', 'step': 14668, 'epoch': 2} {'type': 'loss', 'content': 0.06101049855351448, 'timestamp': '2025-10-01 04:31:09.419254', 'step': 14669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:31:09.450949', 'step': 14669, 'epoch': 2} {'type': 'loss', 'content': 0.0852048248052597, 'timestamp': '2025-10-01 04:31:09.455469', 'step': 14670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:09.486264', 'step': 14670, 'epoch': 2} {'type': 'loss', 'content': 0.039776552468538284, 'timestamp': '2025-10-01 04:31:09.488424', 'step': 14671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:09.519173', 'step': 14671, 'epoch': 2} {'type': 'loss', 'content': 0.06232151761651039, 'timestamp': '2025-10-01 04:31:09.542857', 'step': 14672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:09.573410', 'step': 14672, 'epoch': 2} {'type': 'loss', 'content': 0.08622904866933823, 'timestamp': '2025-10-01 04:31:09.575547', 'step': 14673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:09.606287', 'step': 14673, 'epoch': 2} {'type': 'loss', 'content': 0.12067024409770966, 'timestamp': '2025-10-01 04:31:09.608498', 'step': 14674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:09.638535', 'step': 14674, 'epoch': 2} {'type': 'loss', 'content': 0.07410374283790588, 'timestamp': '2025-10-01 04:31:09.640620', 'step': 14675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:09.671009', 'step': 14675, 'epoch': 2} {'type': 'loss', 'content': 0.07372809946537018, 'timestamp': '2025-10-01 04:31:09.694688', 'step': 14676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:09.724978', 'step': 14676, 'epoch': 2} {'type': 'loss', 'content': 0.10964106023311615, 'timestamp': '2025-10-01 04:31:09.728582', 'step': 14677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:09.758389', 'step': 14677, 'epoch': 2} {'type': 'loss', 'content': 0.06534276157617569, 'timestamp': '2025-10-01 04:31:09.760787', 'step': 14678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:09.792515', 'step': 14678, 'epoch': 2} {'type': 'loss', 'content': 0.04547402635216713, 'timestamp': '2025-10-01 04:31:09.794851', 'step': 14679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:09.825283', 'step': 14679, 'epoch': 2} {'type': 'loss', 'content': 0.10779908299446106, 'timestamp': '2025-10-01 04:31:09.849044', 'step': 14680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:09.880685', 'step': 14680, 'epoch': 2} {'type': 'loss', 'content': 0.08996032178401947, 'timestamp': '2025-10-01 04:31:09.882948', 'step': 14681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:09.914856', 'step': 14681, 'epoch': 2} {'type': 'loss', 'content': 0.08617981523275375, 'timestamp': '2025-10-01 04:31:09.917039', 'step': 14682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:09.948787', 'step': 14682, 'epoch': 2} {'type': 'loss', 'content': 0.10794265568256378, 'timestamp': '2025-10-01 04:31:09.951167', 'step': 14683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:09.988792', 'step': 14683, 'epoch': 2} {'type': 'loss', 'content': 0.08010450750589371, 'timestamp': '2025-10-01 04:31:10.012705', 'step': 14684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:10.042967', 'step': 14684, 'epoch': 2} {'type': 'loss', 'content': 0.11430425196886063, 'timestamp': '2025-10-01 04:31:10.045321', 'step': 14685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:10.075937', 'step': 14685, 'epoch': 2} {'type': 'loss', 'content': 0.14963674545288086, 'timestamp': '2025-10-01 04:31:10.078175', 'step': 14686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:10.109132', 'step': 14686, 'epoch': 2} {'type': 'loss', 'content': 0.06459392607212067, 'timestamp': '2025-10-01 04:31:10.111846', 'step': 14687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:10.143602', 'step': 14687, 'epoch': 2} {'type': 'loss', 'content': 0.050936222076416016, 'timestamp': '2025-10-01 04:31:10.167669', 'step': 14688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:31:10.200803', 'step': 14688, 'epoch': 2} {'type': 'loss', 'content': 0.09569225460290909, 'timestamp': '2025-10-01 04:31:10.203377', 'step': 14689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:10.236835', 'step': 14689, 'epoch': 2} {'type': 'loss', 'content': 0.10416607558727264, 'timestamp': '2025-10-01 04:31:10.239361', 'step': 14690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:10.280085', 'step': 14690, 'epoch': 2} {'type': 'loss', 'content': 0.11056056618690491, 'timestamp': '2025-10-01 04:31:10.282651', 'step': 14691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:10.315952', 'step': 14691, 'epoch': 2} {'type': 'loss', 'content': 0.07070014625787735, 'timestamp': '2025-10-01 04:31:10.340075', 'step': 14692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:10.372188', 'step': 14692, 'epoch': 2} {'type': 'loss', 'content': 0.03535722196102142, 'timestamp': '2025-10-01 04:31:10.375038', 'step': 14693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:10.406810', 'step': 14693, 'epoch': 2} {'type': 'loss', 'content': 0.06792730838060379, 'timestamp': '2025-10-01 04:31:10.409439', 'step': 14694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:10.440931', 'step': 14694, 'epoch': 2} {'type': 'loss', 'content': 0.014947307296097279, 'timestamp': '2025-10-01 04:31:10.444347', 'step': 14695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:10.474382', 'step': 14695, 'epoch': 2} {'type': 'loss', 'content': 0.03496653586626053, 'timestamp': '2025-10-01 04:31:10.498243', 'step': 14696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:10.531362', 'step': 14696, 'epoch': 2} {'type': 'loss', 'content': 0.06969299167394638, 'timestamp': '2025-10-01 04:31:10.533847', 'step': 14697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:10.567791', 'step': 14697, 'epoch': 2} {'type': 'loss', 'content': 0.032987043261528015, 'timestamp': '2025-10-01 04:31:10.570603', 'step': 14698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:10.603271', 'step': 14698, 'epoch': 2} {'type': 'loss', 'content': 0.0983370691537857, 'timestamp': '2025-10-01 04:31:10.606052', 'step': 14699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:10.642967', 'step': 14699, 'epoch': 2} {'type': 'loss', 'content': 0.14363975822925568, 'timestamp': '2025-10-01 04:31:10.667107', 'step': 14700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:10.718135', 'step': 14700, 'epoch': 2} {'type': 'loss', 'content': 0.08515793085098267, 'timestamp': '2025-10-01 04:31:10.720742', 'step': 14701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:10.768297', 'step': 14701, 'epoch': 2} {'type': 'loss', 'content': 0.0373409129679203, 'timestamp': '2025-10-01 04:31:10.770760', 'step': 14702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:10.812419', 'step': 14702, 'epoch': 2} {'type': 'loss', 'content': 0.18021605908870697, 'timestamp': '2025-10-01 04:31:10.815136', 'step': 14703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:10.861154', 'step': 14703, 'epoch': 2} {'type': 'loss', 'content': 0.055634766817092896, 'timestamp': '2025-10-01 04:31:10.885414', 'step': 14704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:10.918382', 'step': 14704, 'epoch': 2} {'type': 'loss', 'content': 0.14048470556735992, 'timestamp': '2025-10-01 04:31:10.920947', 'step': 14705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:10.967939', 'step': 14705, 'epoch': 2} {'type': 'loss', 'content': 0.10821665823459625, 'timestamp': '2025-10-01 04:31:10.970650', 'step': 14706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:11.010524', 'step': 14706, 'epoch': 2} {'type': 'loss', 'content': 0.06246061623096466, 'timestamp': '2025-10-01 04:31:11.012946', 'step': 14707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:11.053713', 'step': 14707, 'epoch': 2} {'type': 'loss', 'content': 0.1459629237651825, 'timestamp': '2025-10-01 04:31:11.077506', 'step': 14708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:11.128099', 'step': 14708, 'epoch': 2} {'type': 'loss', 'content': 0.08433397859334946, 'timestamp': '2025-10-01 04:31:11.130530', 'step': 14709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:11.170602', 'step': 14709, 'epoch': 2} {'type': 'loss', 'content': 0.067967988550663, 'timestamp': '2025-10-01 04:31:11.172924', 'step': 14710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:11.217703', 'step': 14710, 'epoch': 2} {'type': 'loss', 'content': 0.059021979570388794, 'timestamp': '2025-10-01 04:31:11.220428', 'step': 14711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:11.270500', 'step': 14711, 'epoch': 2} {'type': 'loss', 'content': 0.11655587702989578, 'timestamp': '2025-10-01 04:31:11.294748', 'step': 14712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:11.341709', 'step': 14712, 'epoch': 2} {'type': 'loss', 'content': 0.12697842717170715, 'timestamp': '2025-10-01 04:31:11.344100', 'step': 14713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:11.378118', 'step': 14713, 'epoch': 2} {'type': 'loss', 'content': 0.1012067124247551, 'timestamp': '2025-10-01 04:31:11.381052', 'step': 14714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:11.412748', 'step': 14714, 'epoch': 2} {'type': 'loss', 'content': 0.07636068761348724, 'timestamp': '2025-10-01 04:31:11.415016', 'step': 14715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:11.466177', 'step': 14715, 'epoch': 2} {'type': 'loss', 'content': 0.10734687000513077, 'timestamp': '2025-10-01 04:31:11.490066', 'step': 14716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:11.521988', 'step': 14716, 'epoch': 2} {'type': 'loss', 'content': 0.07904186844825745, 'timestamp': '2025-10-01 04:31:11.524145', 'step': 14717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:11.559404', 'step': 14717, 'epoch': 2} {'type': 'loss', 'content': 0.08220814168453217, 'timestamp': '2025-10-01 04:31:11.561641', 'step': 14718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:11.597113', 'step': 14718, 'epoch': 2} {'type': 'loss', 'content': 0.14390210807323456, 'timestamp': '2025-10-01 04:31:11.599425', 'step': 14719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:11.639442', 'step': 14719, 'epoch': 2} {'type': 'loss', 'content': 0.17397546768188477, 'timestamp': '2025-10-01 04:31:11.663326', 'step': 14720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:11.711659', 'step': 14720, 'epoch': 2} {'type': 'loss', 'content': 0.1671324074268341, 'timestamp': '2025-10-01 04:31:11.713989', 'step': 14721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:11.758891', 'step': 14721, 'epoch': 2} {'type': 'loss', 'content': 0.09011680632829666, 'timestamp': '2025-10-01 04:31:11.761201', 'step': 14722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:11.805930', 'step': 14722, 'epoch': 2} {'type': 'loss', 'content': 0.09021167457103729, 'timestamp': '2025-10-01 04:31:11.808199', 'step': 14723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:11.841111', 'step': 14723, 'epoch': 2} {'type': 'loss', 'content': 0.06096339970827103, 'timestamp': '2025-10-01 04:31:11.864974', 'step': 14724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:11.904257', 'step': 14724, 'epoch': 2} {'type': 'loss', 'content': 0.184205561876297, 'timestamp': '2025-10-01 04:31:11.906398', 'step': 14725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:11.938847', 'step': 14725, 'epoch': 2} {'type': 'loss', 'content': 0.08665714412927628, 'timestamp': '2025-10-01 04:31:11.941155', 'step': 14726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:11.991669', 'step': 14726, 'epoch': 2} {'type': 'loss', 'content': 0.03183198347687721, 'timestamp': '2025-10-01 04:31:11.995765', 'step': 14727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:12.044317', 'step': 14727, 'epoch': 2} {'type': 'loss', 'content': 0.13086245954036713, 'timestamp': '2025-10-01 04:31:12.068161', 'step': 14728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:12.107052', 'step': 14728, 'epoch': 2} {'type': 'loss', 'content': 0.11137386411428452, 'timestamp': '2025-10-01 04:31:12.110032', 'step': 14729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:12.143199', 'step': 14729, 'epoch': 2} {'type': 'loss', 'content': 0.09827752411365509, 'timestamp': '2025-10-01 04:31:12.145446', 'step': 14730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:12.184901', 'step': 14730, 'epoch': 2} {'type': 'loss', 'content': 0.12929491698741913, 'timestamp': '2025-10-01 04:31:12.187109', 'step': 14731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:12.231550', 'step': 14731, 'epoch': 2} {'type': 'loss', 'content': 0.13844646513462067, 'timestamp': '2025-10-01 04:31:12.255237', 'step': 14732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:12.297010', 'step': 14732, 'epoch': 2} {'type': 'loss', 'content': 0.12930135428905487, 'timestamp': '2025-10-01 04:31:12.299259', 'step': 14733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:12.342308', 'step': 14733, 'epoch': 2} {'type': 'loss', 'content': 0.07357702404260635, 'timestamp': '2025-10-01 04:31:12.345805', 'step': 14734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:12.396428', 'step': 14734, 'epoch': 2} {'type': 'loss', 'content': 0.04470669478178024, 'timestamp': '2025-10-01 04:31:12.398573', 'step': 14735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:12.447679', 'step': 14735, 'epoch': 2} {'type': 'loss', 'content': 0.1595936268568039, 'timestamp': '2025-10-01 04:31:12.471402', 'step': 14736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:12.517334', 'step': 14736, 'epoch': 2} {'type': 'loss', 'content': 0.04501812532544136, 'timestamp': '2025-10-01 04:31:12.519561', 'step': 14737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:12.560045', 'step': 14737, 'epoch': 2} {'type': 'loss', 'content': 0.1364808976650238, 'timestamp': '2025-10-01 04:31:12.565239', 'step': 14738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:12.628357', 'step': 14738, 'epoch': 2} {'type': 'loss', 'content': 0.05465796962380409, 'timestamp': '2025-10-01 04:31:12.644249', 'step': 14739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:12.694974', 'step': 14739, 'epoch': 2} {'type': 'loss', 'content': 0.06973225623369217, 'timestamp': '2025-10-01 04:31:12.719500', 'step': 14740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:12.771787', 'step': 14740, 'epoch': 2} {'type': 'loss', 'content': 0.20435796678066254, 'timestamp': '2025-10-01 04:31:12.774345', 'step': 14741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:12.813360', 'step': 14741, 'epoch': 2} {'type': 'loss', 'content': 0.05321922153234482, 'timestamp': '2025-10-01 04:31:12.815846', 'step': 14742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:12.850875', 'step': 14742, 'epoch': 2} {'type': 'loss', 'content': 0.10320373624563217, 'timestamp': '2025-10-01 04:31:12.853074', 'step': 14743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:12.902018', 'step': 14743, 'epoch': 2} {'type': 'loss', 'content': 0.002371621783822775, 'timestamp': '2025-10-01 04:31:12.925904', 'step': 14744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:12.964714', 'step': 14744, 'epoch': 2} {'type': 'loss', 'content': 0.030213894322514534, 'timestamp': '2025-10-01 04:31:12.966860', 'step': 14745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:13.014576', 'step': 14745, 'epoch': 2} {'type': 'loss', 'content': 0.10139554738998413, 'timestamp': '2025-10-01 04:31:13.017608', 'step': 14746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:13.060378', 'step': 14746, 'epoch': 2} {'type': 'loss', 'content': 0.08583907783031464, 'timestamp': '2025-10-01 04:31:13.062683', 'step': 14747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.102440', 'step': 14747, 'epoch': 2} {'type': 'loss', 'content': 0.08759672194719315, 'timestamp': '2025-10-01 04:31:13.126194', 'step': 14748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:13.167450', 'step': 14748, 'epoch': 2} {'type': 'loss', 'content': 0.03428276255726814, 'timestamp': '2025-10-01 04:31:13.170127', 'step': 14749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.205376', 'step': 14749, 'epoch': 2} {'type': 'loss', 'content': 0.10192318260669708, 'timestamp': '2025-10-01 04:31:13.207566', 'step': 14750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:13.243434', 'step': 14750, 'epoch': 2} {'type': 'loss', 'content': 0.1075872927904129, 'timestamp': '2025-10-01 04:31:13.245539', 'step': 14751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.278063', 'step': 14751, 'epoch': 2} {'type': 'loss', 'content': 0.08612222969532013, 'timestamp': '2025-10-01 04:31:13.301817', 'step': 14752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.337949', 'step': 14752, 'epoch': 2} {'type': 'loss', 'content': 0.10185809433460236, 'timestamp': '2025-10-01 04:31:13.340270', 'step': 14753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.380096', 'step': 14753, 'epoch': 2} {'type': 'loss', 'content': 0.06475456058979034, 'timestamp': '2025-10-01 04:31:13.383725', 'step': 14754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.415973', 'step': 14754, 'epoch': 2} {'type': 'loss', 'content': 0.0840241014957428, 'timestamp': '2025-10-01 04:31:13.420119', 'step': 14755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.454890', 'step': 14755, 'epoch': 2} {'type': 'loss', 'content': 0.07936286181211472, 'timestamp': '2025-10-01 04:31:13.479055', 'step': 14756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:13.511696', 'step': 14756, 'epoch': 2} {'type': 'loss', 'content': 0.11339735984802246, 'timestamp': '2025-10-01 04:31:13.513978', 'step': 14757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.544843', 'step': 14757, 'epoch': 2} {'type': 'loss', 'content': 0.12494805455207825, 'timestamp': '2025-10-01 04:31:13.547194', 'step': 14758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.579176', 'step': 14758, 'epoch': 2} {'type': 'loss', 'content': 0.06552919745445251, 'timestamp': '2025-10-01 04:31:13.581706', 'step': 14759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:13.614981', 'step': 14759, 'epoch': 2} {'type': 'loss', 'content': 0.060055751353502274, 'timestamp': '2025-10-01 04:31:13.638804', 'step': 14760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:13.671951', 'step': 14760, 'epoch': 2} {'type': 'loss', 'content': 0.1164499968290329, 'timestamp': '2025-10-01 04:31:13.674243', 'step': 14761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.706587', 'step': 14761, 'epoch': 2} {'type': 'loss', 'content': 0.12752757966518402, 'timestamp': '2025-10-01 04:31:13.708779', 'step': 14762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:13.740677', 'step': 14762, 'epoch': 2} {'type': 'loss', 'content': 0.12871821224689484, 'timestamp': '2025-10-01 04:31:13.743126', 'step': 14763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:13.779688', 'step': 14763, 'epoch': 2} {'type': 'loss', 'content': 0.057030074298381805, 'timestamp': '2025-10-01 04:31:13.804047', 'step': 14764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:13.843847', 'step': 14764, 'epoch': 2} {'type': 'loss', 'content': 0.07844951748847961, 'timestamp': '2025-10-01 04:31:13.846507', 'step': 14765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.877687', 'step': 14765, 'epoch': 2} {'type': 'loss', 'content': 0.09714915603399277, 'timestamp': '2025-10-01 04:31:13.879901', 'step': 14766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:13.913881', 'step': 14766, 'epoch': 2} {'type': 'loss', 'content': 0.06718961149454117, 'timestamp': '2025-10-01 04:31:13.920498', 'step': 14767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:13.955521', 'step': 14767, 'epoch': 2} {'type': 'loss', 'content': 0.10610846430063248, 'timestamp': '2025-10-01 04:31:13.979361', 'step': 14768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:14.025734', 'step': 14768, 'epoch': 2} {'type': 'loss', 'content': 0.10740908980369568, 'timestamp': '2025-10-01 04:31:14.028523', 'step': 14769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:14.064757', 'step': 14769, 'epoch': 2} {'type': 'loss', 'content': 0.10275787860155106, 'timestamp': '2025-10-01 04:31:14.067178', 'step': 14770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:14.106859', 'step': 14770, 'epoch': 2} {'type': 'loss', 'content': 0.07792458683252335, 'timestamp': '2025-10-01 04:31:14.109201', 'step': 14771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:14.146544', 'step': 14771, 'epoch': 2} {'type': 'loss', 'content': 0.09383927285671234, 'timestamp': '2025-10-01 04:31:14.170355', 'step': 14772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:14.201334', 'step': 14772, 'epoch': 2} {'type': 'loss', 'content': 0.10457869619131088, 'timestamp': '2025-10-01 04:31:14.213796', 'step': 14773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:14.245746', 'step': 14773, 'epoch': 2} {'type': 'loss', 'content': 0.11465369910001755, 'timestamp': '2025-10-01 04:31:14.247810', 'step': 14774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:14.288869', 'step': 14774, 'epoch': 2} {'type': 'loss', 'content': 0.12896382808685303, 'timestamp': '2025-10-01 04:31:14.290895', 'step': 14775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:14.321521', 'step': 14775, 'epoch': 2} {'type': 'loss', 'content': 0.056905485689640045, 'timestamp': '2025-10-01 04:31:14.345419', 'step': 14776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:14.376903', 'step': 14776, 'epoch': 2} {'type': 'loss', 'content': 0.12388081848621368, 'timestamp': '2025-10-01 04:31:14.379893', 'step': 14777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:14.411771', 'step': 14777, 'epoch': 2} {'type': 'loss', 'content': 0.14695678651332855, 'timestamp': '2025-10-01 04:31:14.414348', 'step': 14778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:14.446911', 'step': 14778, 'epoch': 2} {'type': 'loss', 'content': 0.0771409347653389, 'timestamp': '2025-10-01 04:31:14.449215', 'step': 14779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:14.483055', 'step': 14779, 'epoch': 2} {'type': 'loss', 'content': 0.12052308768033981, 'timestamp': '2025-10-01 04:31:14.506587', 'step': 14780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:14.553284', 'step': 14780, 'epoch': 2} {'type': 'loss', 'content': 0.24266603589057922, 'timestamp': '2025-10-01 04:31:14.555445', 'step': 14781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:14.599444', 'step': 14781, 'epoch': 2} {'type': 'loss', 'content': 0.10528162866830826, 'timestamp': '2025-10-01 04:31:14.601605', 'step': 14782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:14.634737', 'step': 14782, 'epoch': 2} {'type': 'loss', 'content': 0.08938337117433548, 'timestamp': '2025-10-01 04:31:14.636826', 'step': 14783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:14.668138', 'step': 14783, 'epoch': 2} {'type': 'loss', 'content': 0.02819567546248436, 'timestamp': '2025-10-01 04:31:14.692075', 'step': 14784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:14.744849', 'step': 14784, 'epoch': 2} {'type': 'loss', 'content': 0.09118841588497162, 'timestamp': '2025-10-01 04:31:14.747270', 'step': 14785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:14.778424', 'step': 14785, 'epoch': 2} {'type': 'loss', 'content': 0.10412262380123138, 'timestamp': '2025-10-01 04:31:14.780628', 'step': 14786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:14.811075', 'step': 14786, 'epoch': 2} {'type': 'loss', 'content': 0.08260294049978256, 'timestamp': '2025-10-01 04:31:14.813800', 'step': 14787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:14.843812', 'step': 14787, 'epoch': 2} {'type': 'loss', 'content': 0.09440900385379791, 'timestamp': '2025-10-01 04:31:14.867562', 'step': 14788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:14.898181', 'step': 14788, 'epoch': 2} {'type': 'loss', 'content': 0.10699702799320221, 'timestamp': '2025-10-01 04:31:14.900371', 'step': 14789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:14.935822', 'step': 14789, 'epoch': 2} {'type': 'loss', 'content': 0.18072038888931274, 'timestamp': '2025-10-01 04:31:14.937989', 'step': 14790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:14.969065', 'step': 14790, 'epoch': 2} {'type': 'loss', 'content': 0.07144630700349808, 'timestamp': '2025-10-01 04:31:14.971155', 'step': 14791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:15.002088', 'step': 14791, 'epoch': 2} {'type': 'loss', 'content': 0.13303814828395844, 'timestamp': '2025-10-01 04:31:15.025964', 'step': 14792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:15.057520', 'step': 14792, 'epoch': 2} {'type': 'loss', 'content': 0.09866683930158615, 'timestamp': '2025-10-01 04:31:15.059639', 'step': 14793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:15.090190', 'step': 14793, 'epoch': 2} {'type': 'loss', 'content': 0.050322920083999634, 'timestamp': '2025-10-01 04:31:15.092194', 'step': 14794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:15.122569', 'step': 14794, 'epoch': 2} {'type': 'loss', 'content': 0.07164476066827774, 'timestamp': '2025-10-01 04:31:15.126306', 'step': 14795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:15.158050', 'step': 14795, 'epoch': 2} {'type': 'loss', 'content': 0.08762400597333908, 'timestamp': '2025-10-01 04:31:15.181672', 'step': 14796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:15.213043', 'step': 14796, 'epoch': 2} {'type': 'loss', 'content': 0.15444254875183105, 'timestamp': '2025-10-01 04:31:15.215428', 'step': 14797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:15.247366', 'step': 14797, 'epoch': 2} {'type': 'loss', 'content': 0.06048767641186714, 'timestamp': '2025-10-01 04:31:15.250171', 'step': 14798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:15.283798', 'step': 14798, 'epoch': 2} {'type': 'loss', 'content': 0.11540839821100235, 'timestamp': '2025-10-01 04:31:15.287079', 'step': 14799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:15.320660', 'step': 14799, 'epoch': 2} {'type': 'loss', 'content': 0.09976019710302353, 'timestamp': '2025-10-01 04:31:15.344928', 'step': 14800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:15.378315', 'step': 14800, 'epoch': 2} {'type': 'loss', 'content': 0.14806130528450012, 'timestamp': '2025-10-01 04:31:15.380762', 'step': 14801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:15.414287', 'step': 14801, 'epoch': 2} {'type': 'loss', 'content': 0.15153531730175018, 'timestamp': '2025-10-01 04:31:15.416517', 'step': 14802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:15.454551', 'step': 14802, 'epoch': 2} {'type': 'loss', 'content': 0.10404066741466522, 'timestamp': '2025-10-01 04:31:15.456779', 'step': 14803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:15.492529', 'step': 14803, 'epoch': 2} {'type': 'loss', 'content': 0.15691834688186646, 'timestamp': '2025-10-01 04:31:15.516651', 'step': 14804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:15.558748', 'step': 14804, 'epoch': 2} {'type': 'loss', 'content': 0.0594472661614418, 'timestamp': '2025-10-01 04:31:15.561203', 'step': 14805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:15.601365', 'step': 14805, 'epoch': 2} {'type': 'loss', 'content': 0.12229023873806, 'timestamp': '2025-10-01 04:31:15.603824', 'step': 14806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:15.645197', 'step': 14806, 'epoch': 2} {'type': 'loss', 'content': 0.07689894735813141, 'timestamp': '2025-10-01 04:31:15.647458', 'step': 14807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:15.695421', 'step': 14807, 'epoch': 2} {'type': 'loss', 'content': 0.08676596730947495, 'timestamp': '2025-10-01 04:31:15.719227', 'step': 14808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:15.751796', 'step': 14808, 'epoch': 2} {'type': 'loss', 'content': 0.057553648948669434, 'timestamp': '2025-10-01 04:31:15.754016', 'step': 14809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:15.792682', 'step': 14809, 'epoch': 2} {'type': 'loss', 'content': 0.08543417602777481, 'timestamp': '2025-10-01 04:31:15.798823', 'step': 14810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:15.832235', 'step': 14810, 'epoch': 2} {'type': 'loss', 'content': 0.08743339031934738, 'timestamp': '2025-10-01 04:31:15.834672', 'step': 14811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:15.897620', 'step': 14811, 'epoch': 2} {'type': 'loss', 'content': 0.12724512815475464, 'timestamp': '2025-10-01 04:31:15.921410', 'step': 14812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:15.975861', 'step': 14812, 'epoch': 2} {'type': 'loss', 'content': 0.11127497255802155, 'timestamp': '2025-10-01 04:31:15.978342', 'step': 14813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:16.013226', 'step': 14813, 'epoch': 2} {'type': 'loss', 'content': 0.07697691768407822, 'timestamp': '2025-10-01 04:31:16.029214', 'step': 14814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:16.063333', 'step': 14814, 'epoch': 2} {'type': 'loss', 'content': 0.13876143097877502, 'timestamp': '2025-10-01 04:31:16.065619', 'step': 14815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:16.106687', 'step': 14815, 'epoch': 2} {'type': 'loss', 'content': 0.09204563498497009, 'timestamp': '2025-10-01 04:31:16.130285', 'step': 14816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:16.163360', 'step': 14816, 'epoch': 2} {'type': 'loss', 'content': 0.17376849055290222, 'timestamp': '2025-10-01 04:31:16.165567', 'step': 14817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:16.201073', 'step': 14817, 'epoch': 2} {'type': 'loss', 'content': 0.040806133300065994, 'timestamp': '2025-10-01 04:31:16.203496', 'step': 14818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:16.236524', 'step': 14818, 'epoch': 2} {'type': 'loss', 'content': 0.1263365000486374, 'timestamp': '2025-10-01 04:31:16.238735', 'step': 14819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:16.276430', 'step': 14819, 'epoch': 2} {'type': 'loss', 'content': 0.07886186987161636, 'timestamp': '2025-10-01 04:31:16.300678', 'step': 14820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:16.337864', 'step': 14820, 'epoch': 2} {'type': 'loss', 'content': 0.1433626115322113, 'timestamp': '2025-10-01 04:31:16.340155', 'step': 14821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:16.370897', 'step': 14821, 'epoch': 2} {'type': 'loss', 'content': 0.09227684885263443, 'timestamp': '2025-10-01 04:31:16.373126', 'step': 14822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:16.404123', 'step': 14822, 'epoch': 2} {'type': 'loss', 'content': 0.02747858129441738, 'timestamp': '2025-10-01 04:31:16.406332', 'step': 14823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:16.437440', 'step': 14823, 'epoch': 2} {'type': 'loss', 'content': 0.08350441604852676, 'timestamp': '2025-10-01 04:31:16.461205', 'step': 14824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:16.493247', 'step': 14824, 'epoch': 2} {'type': 'loss', 'content': 0.1542207896709442, 'timestamp': '2025-10-01 04:31:16.495637', 'step': 14825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:16.527425', 'step': 14825, 'epoch': 2} {'type': 'loss', 'content': 0.07331445068120956, 'timestamp': '2025-10-01 04:31:16.529641', 'step': 14826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:16.560886', 'step': 14826, 'epoch': 2} {'type': 'loss', 'content': 0.046393007040023804, 'timestamp': '2025-10-01 04:31:16.563304', 'step': 14827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:16.594990', 'step': 14827, 'epoch': 2} {'type': 'loss', 'content': 0.129738911986351, 'timestamp': '2025-10-01 04:31:16.619005', 'step': 14828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:16.649886', 'step': 14828, 'epoch': 2} {'type': 'loss', 'content': 0.11276815086603165, 'timestamp': '2025-10-01 04:31:16.652216', 'step': 14829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:16.683358', 'step': 14829, 'epoch': 2} {'type': 'loss', 'content': 0.10179375112056732, 'timestamp': '2025-10-01 04:31:16.686086', 'step': 14830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:31:16.717421', 'step': 14830, 'epoch': 2} {'type': 'loss', 'content': 0.061774756759405136, 'timestamp': '2025-10-01 04:31:16.728455', 'step': 14831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:16.767708', 'step': 14831, 'epoch': 2} {'type': 'loss', 'content': 0.09938981384038925, 'timestamp': '2025-10-01 04:31:16.793260', 'step': 14832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:16.824974', 'step': 14832, 'epoch': 2} {'type': 'loss', 'content': 0.13043446838855743, 'timestamp': '2025-10-01 04:31:16.827052', 'step': 14833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:16.857762', 'step': 14833, 'epoch': 2} {'type': 'loss', 'content': 0.1516626477241516, 'timestamp': '2025-10-01 04:31:16.860159', 'step': 14834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:16.900632', 'step': 14834, 'epoch': 2} {'type': 'loss', 'content': 0.07894349843263626, 'timestamp': '2025-10-01 04:31:16.902994', 'step': 14835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:16.936306', 'step': 14835, 'epoch': 2} {'type': 'loss', 'content': 0.15346862375736237, 'timestamp': '2025-10-01 04:31:16.960705', 'step': 14836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:16.991518', 'step': 14836, 'epoch': 2} {'type': 'loss', 'content': 0.09368741512298584, 'timestamp': '2025-10-01 04:31:16.993783', 'step': 14837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:17.025397', 'step': 14837, 'epoch': 2} {'type': 'loss', 'content': 0.06478863209486008, 'timestamp': '2025-10-01 04:31:17.027665', 'step': 14838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:17.072975', 'step': 14838, 'epoch': 2} {'type': 'loss', 'content': 0.043090708553791046, 'timestamp': '2025-10-01 04:31:17.075534', 'step': 14839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:17.108252', 'step': 14839, 'epoch': 2} {'type': 'loss', 'content': 0.10251495242118835, 'timestamp': '2025-10-01 04:31:17.132066', 'step': 14840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:17.167770', 'step': 14840, 'epoch': 2} {'type': 'loss', 'content': 0.13950493931770325, 'timestamp': '2025-10-01 04:31:17.169861', 'step': 14841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:17.213928', 'step': 14841, 'epoch': 2} {'type': 'loss', 'content': 0.08008978515863419, 'timestamp': '2025-10-01 04:31:17.216350', 'step': 14842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:17.251845', 'step': 14842, 'epoch': 2} {'type': 'loss', 'content': 0.08473019301891327, 'timestamp': '2025-10-01 04:31:17.254451', 'step': 14843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:17.285981', 'step': 14843, 'epoch': 2} {'type': 'loss', 'content': 0.09477678686380386, 'timestamp': '2025-10-01 04:31:17.309782', 'step': 14844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:17.342176', 'step': 14844, 'epoch': 2} {'type': 'loss', 'content': 0.13420970737934113, 'timestamp': '2025-10-01 04:31:17.344495', 'step': 14845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:17.376977', 'step': 14845, 'epoch': 2} {'type': 'loss', 'content': 0.06420822441577911, 'timestamp': '2025-10-01 04:31:17.379584', 'step': 14846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:17.413192', 'step': 14846, 'epoch': 2} {'type': 'loss', 'content': 0.03301459178328514, 'timestamp': '2025-10-01 04:31:17.415989', 'step': 14847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:17.447812', 'step': 14847, 'epoch': 2} {'type': 'loss', 'content': 0.07924707233905792, 'timestamp': '2025-10-01 04:31:17.471590', 'step': 14848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:17.518611', 'step': 14848, 'epoch': 2} {'type': 'loss', 'content': 0.12432581931352615, 'timestamp': '2025-10-01 04:31:17.521059', 'step': 14849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:17.570057', 'step': 14849, 'epoch': 2} {'type': 'loss', 'content': 0.07751975208520889, 'timestamp': '2025-10-01 04:31:17.572274', 'step': 14850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:17.603179', 'step': 14850, 'epoch': 2} {'type': 'loss', 'content': 0.1313779652118683, 'timestamp': '2025-10-01 04:31:17.605463', 'step': 14851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:17.636940', 'step': 14851, 'epoch': 2} {'type': 'loss', 'content': 0.118777796626091, 'timestamp': '2025-10-01 04:31:17.660877', 'step': 14852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:17.691419', 'step': 14852, 'epoch': 2} {'type': 'loss', 'content': 0.1288388967514038, 'timestamp': '2025-10-01 04:31:17.693745', 'step': 14853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:17.725998', 'step': 14853, 'epoch': 2} {'type': 'loss', 'content': 0.11012449115514755, 'timestamp': '2025-10-01 04:31:17.739029', 'step': 14854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:17.770897', 'step': 14854, 'epoch': 2} {'type': 'loss', 'content': 0.049209315329790115, 'timestamp': '2025-10-01 04:31:17.773175', 'step': 14855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:17.804553', 'step': 14855, 'epoch': 2} {'type': 'loss', 'content': 0.06825844943523407, 'timestamp': '2025-10-01 04:31:17.828437', 'step': 14856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:17.858890', 'step': 14856, 'epoch': 2} {'type': 'loss', 'content': 0.07483410835266113, 'timestamp': '2025-10-01 04:31:17.861232', 'step': 14857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:17.891953', 'step': 14857, 'epoch': 2} {'type': 'loss', 'content': 0.055055517703294754, 'timestamp': '2025-10-01 04:31:17.896357', 'step': 14858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:17.929685', 'step': 14858, 'epoch': 2} {'type': 'loss', 'content': 0.0694604143500328, 'timestamp': '2025-10-01 04:31:17.931989', 'step': 14859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:17.963944', 'step': 14859, 'epoch': 2} {'type': 'loss', 'content': 0.09796358644962311, 'timestamp': '2025-10-01 04:31:17.988854', 'step': 14860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:18.039617', 'step': 14860, 'epoch': 2} {'type': 'loss', 'content': 0.14964109659194946, 'timestamp': '2025-10-01 04:31:18.041954', 'step': 14861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.072338', 'step': 14861, 'epoch': 2} {'type': 'loss', 'content': 0.11418173462152481, 'timestamp': '2025-10-01 04:31:18.074637', 'step': 14862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:18.112720', 'step': 14862, 'epoch': 2} {'type': 'loss', 'content': 0.10860978811979294, 'timestamp': '2025-10-01 04:31:18.115946', 'step': 14863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.146694', 'step': 14863, 'epoch': 2} {'type': 'loss', 'content': 0.11369568109512329, 'timestamp': '2025-10-01 04:31:18.170521', 'step': 14864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:18.200993', 'step': 14864, 'epoch': 2} {'type': 'loss', 'content': 0.07747850567102432, 'timestamp': '2025-10-01 04:31:18.203702', 'step': 14865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:18.234024', 'step': 14865, 'epoch': 2} {'type': 'loss', 'content': 0.13845069706439972, 'timestamp': '2025-10-01 04:31:18.237331', 'step': 14866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.271557', 'step': 14866, 'epoch': 2} {'type': 'loss', 'content': 0.10162921994924545, 'timestamp': '2025-10-01 04:31:18.274330', 'step': 14867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.305319', 'step': 14867, 'epoch': 2} {'type': 'loss', 'content': 0.24674063920974731, 'timestamp': '2025-10-01 04:31:18.329745', 'step': 14868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:18.366158', 'step': 14868, 'epoch': 2} {'type': 'loss', 'content': 0.1720574051141739, 'timestamp': '2025-10-01 04:31:18.368751', 'step': 14869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.404659', 'step': 14869, 'epoch': 2} {'type': 'loss', 'content': 0.06673909723758698, 'timestamp': '2025-10-01 04:31:18.407450', 'step': 14870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.443023', 'step': 14870, 'epoch': 2} {'type': 'loss', 'content': 0.03650817647576332, 'timestamp': '2025-10-01 04:31:18.445287', 'step': 14871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:18.478718', 'step': 14871, 'epoch': 2} {'type': 'loss', 'content': 0.06819888949394226, 'timestamp': '2025-10-01 04:31:18.503231', 'step': 14872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:18.545497', 'step': 14872, 'epoch': 2} {'type': 'loss', 'content': 0.09550601243972778, 'timestamp': '2025-10-01 04:31:18.548178', 'step': 14873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:18.581293', 'step': 14873, 'epoch': 2} {'type': 'loss', 'content': 0.03172667324542999, 'timestamp': '2025-10-01 04:31:18.583630', 'step': 14874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.614609', 'step': 14874, 'epoch': 2} {'type': 'loss', 'content': 0.03137890622019768, 'timestamp': '2025-10-01 04:31:18.616915', 'step': 14875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:18.647600', 'step': 14875, 'epoch': 2} {'type': 'loss', 'content': 0.047873642295598984, 'timestamp': '2025-10-01 04:31:18.671558', 'step': 14876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:18.702075', 'step': 14876, 'epoch': 2} {'type': 'loss', 'content': 0.1372777670621872, 'timestamp': '2025-10-01 04:31:18.704574', 'step': 14877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:18.735280', 'step': 14877, 'epoch': 2} {'type': 'loss', 'content': 0.06355228275060654, 'timestamp': '2025-10-01 04:31:18.737714', 'step': 14878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.769991', 'step': 14878, 'epoch': 2} {'type': 'loss', 'content': 0.1353234052658081, 'timestamp': '2025-10-01 04:31:18.772793', 'step': 14879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:18.803830', 'step': 14879, 'epoch': 2} {'type': 'loss', 'content': 0.11052678525447845, 'timestamp': '2025-10-01 04:31:18.828042', 'step': 14880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:18.860670', 'step': 14880, 'epoch': 2} {'type': 'loss', 'content': 0.030973196029663086, 'timestamp': '2025-10-01 04:31:18.863119', 'step': 14881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.894243', 'step': 14881, 'epoch': 2} {'type': 'loss', 'content': 0.11870913207530975, 'timestamp': '2025-10-01 04:31:18.896510', 'step': 14882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.927151', 'step': 14882, 'epoch': 2} {'type': 'loss', 'content': 0.1555313915014267, 'timestamp': '2025-10-01 04:31:18.930223', 'step': 14883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:18.963071', 'step': 14883, 'epoch': 2} {'type': 'loss', 'content': 0.042850688099861145, 'timestamp': '2025-10-01 04:31:18.986679', 'step': 14884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:19.017619', 'step': 14884, 'epoch': 2} {'type': 'loss', 'content': 0.14164301753044128, 'timestamp': '2025-10-01 04:31:19.020518', 'step': 14885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:19.052724', 'step': 14885, 'epoch': 2} {'type': 'loss', 'content': 0.04460463300347328, 'timestamp': '2025-10-01 04:31:19.055398', 'step': 14886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:19.086165', 'step': 14886, 'epoch': 2} {'type': 'loss', 'content': 0.06813749670982361, 'timestamp': '2025-10-01 04:31:19.089013', 'step': 14887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:19.120612', 'step': 14887, 'epoch': 2} {'type': 'loss', 'content': 0.050506602972745895, 'timestamp': '2025-10-01 04:31:19.144432', 'step': 14888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:19.175902', 'step': 14888, 'epoch': 2} {'type': 'loss', 'content': 0.176178976893425, 'timestamp': '2025-10-01 04:31:19.178652', 'step': 14889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:19.209310', 'step': 14889, 'epoch': 2} {'type': 'loss', 'content': 0.07397320866584778, 'timestamp': '2025-10-01 04:31:19.212122', 'step': 14890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:19.242969', 'step': 14890, 'epoch': 2} {'type': 'loss', 'content': 0.17097891867160797, 'timestamp': '2025-10-01 04:31:19.245981', 'step': 14891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:19.277382', 'step': 14891, 'epoch': 2} {'type': 'loss', 'content': 0.14607366919517517, 'timestamp': '2025-10-01 04:31:19.301077', 'step': 14892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:19.331679', 'step': 14892, 'epoch': 2} {'type': 'loss', 'content': 0.07705312222242355, 'timestamp': '2025-10-01 04:31:19.334326', 'step': 14893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:19.366010', 'step': 14893, 'epoch': 2} {'type': 'loss', 'content': 0.04451965168118477, 'timestamp': '2025-10-01 04:31:19.368903', 'step': 14894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:19.401664', 'step': 14894, 'epoch': 2} {'type': 'loss', 'content': 0.09980840981006622, 'timestamp': '2025-10-01 04:31:19.404751', 'step': 14895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:19.436211', 'step': 14895, 'epoch': 2} {'type': 'loss', 'content': 0.09922366589307785, 'timestamp': '2025-10-01 04:31:19.460455', 'step': 14896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:19.491306', 'step': 14896, 'epoch': 2} {'type': 'loss', 'content': 0.11567623168230057, 'timestamp': '2025-10-01 04:31:19.493662', 'step': 14897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:19.526797', 'step': 14897, 'epoch': 2} {'type': 'loss', 'content': 0.061021167784929276, 'timestamp': '2025-10-01 04:31:19.529055', 'step': 14898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:19.562880', 'step': 14898, 'epoch': 2} {'type': 'loss', 'content': 0.10369378328323364, 'timestamp': '2025-10-01 04:31:19.565300', 'step': 14899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:19.601031', 'step': 14899, 'epoch': 2} {'type': 'loss', 'content': 0.14731962978839874, 'timestamp': '2025-10-01 04:31:19.624922', 'step': 14900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:19.659424', 'step': 14900, 'epoch': 2} {'type': 'loss', 'content': 0.08837675303220749, 'timestamp': '2025-10-01 04:31:19.661517', 'step': 14901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:19.693807', 'step': 14901, 'epoch': 2} {'type': 'loss', 'content': 0.10034186393022537, 'timestamp': '2025-10-01 04:31:19.696083', 'step': 14902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:31:19.729253', 'step': 14902, 'epoch': 2} {'type': 'loss', 'content': 0.1337762326002121, 'timestamp': '2025-10-01 04:31:19.733709', 'step': 14903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:19.768839', 'step': 14903, 'epoch': 2} {'type': 'loss', 'content': 0.04065721482038498, 'timestamp': '2025-10-01 04:31:19.792571', 'step': 14904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:19.823046', 'step': 14904, 'epoch': 2} {'type': 'loss', 'content': 0.0707937553524971, 'timestamp': '2025-10-01 04:31:19.825231', 'step': 14905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:19.855622', 'step': 14905, 'epoch': 2} {'type': 'loss', 'content': 0.0805409625172615, 'timestamp': '2025-10-01 04:31:19.857880', 'step': 14906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:19.893529', 'step': 14906, 'epoch': 2} {'type': 'loss', 'content': 0.1565866619348526, 'timestamp': '2025-10-01 04:31:19.895844', 'step': 14907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:19.927046', 'step': 14907, 'epoch': 2} {'type': 'loss', 'content': 0.1429774910211563, 'timestamp': '2025-10-01 04:31:19.950735', 'step': 14908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:19.980992', 'step': 14908, 'epoch': 2} {'type': 'loss', 'content': 0.09743636101484299, 'timestamp': '2025-10-01 04:31:19.983334', 'step': 14909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:20.013891', 'step': 14909, 'epoch': 2} {'type': 'loss', 'content': 0.15380613505840302, 'timestamp': '2025-10-01 04:31:20.016194', 'step': 14910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:20.046842', 'step': 14910, 'epoch': 2} {'type': 'loss', 'content': 0.10385320335626602, 'timestamp': '2025-10-01 04:31:20.049080', 'step': 14911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:20.089365', 'step': 14911, 'epoch': 2} {'type': 'loss', 'content': 0.04719836637377739, 'timestamp': '2025-10-01 04:31:20.113019', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:31:28.845681', 'step': 14912, 'epoch': 2} {'type': 'pplx', 'content': 10950.651047140702, 'timestamp': '2025-10-01 04:31:28.848745', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:28.877447', 'step': 14912, 'epoch': 2} {'type': 'loss', 'content': 0.08127995580434799, 'timestamp': '2025-10-01 04:31:28.880981', 'step': 14913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:28.911930', 'step': 14913, 'epoch': 2} {'type': 'loss', 'content': 0.03879270702600479, 'timestamp': '2025-10-01 04:31:28.913949', 'step': 14914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:28.943773', 'step': 14914, 'epoch': 2} {'type': 'loss', 'content': 0.2921725809574127, 'timestamp': '2025-10-01 04:31:28.946201', 'step': 14915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:28.976056', 'step': 14915, 'epoch': 2} {'type': 'loss', 'content': 0.08395586162805557, 'timestamp': '2025-10-01 04:31:28.999851', 'step': 14916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.039755', 'step': 14916, 'epoch': 2} {'type': 'loss', 'content': 0.08450541645288467, 'timestamp': '2025-10-01 04:31:29.041965', 'step': 14917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.072030', 'step': 14917, 'epoch': 2} {'type': 'loss', 'content': 0.06021988391876221, 'timestamp': '2025-10-01 04:31:29.074022', 'step': 14918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:29.106073', 'step': 14918, 'epoch': 2} {'type': 'loss', 'content': 0.07063443213701248, 'timestamp': '2025-10-01 04:31:29.108298', 'step': 14919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1542724875376}, 'timestamp': '2025-10-01 04:31:29.147317', 'step': 14919, 'epoch': 2} {'type': 'loss', 'content': 0.2645031213760376, 'timestamp': '2025-10-01 04:31:29.170786', 'step': 14920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.213909', 'step': 14920, 'epoch': 3} {'type': 'loss', 'content': 0.057146359235048294, 'timestamp': '2025-10-01 04:31:29.215910', 'step': 14921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.248199', 'step': 14921, 'epoch': 3} {'type': 'loss', 'content': 0.07561507076025009, 'timestamp': '2025-10-01 04:31:29.250118', 'step': 14922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:29.280956', 'step': 14922, 'epoch': 3} {'type': 'loss', 'content': 0.07412122935056686, 'timestamp': '2025-10-01 04:31:29.282888', 'step': 14923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:29.314905', 'step': 14923, 'epoch': 3} {'type': 'loss', 'content': 0.11303188651800156, 'timestamp': '2025-10-01 04:31:29.338322', 'step': 14924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:29.372081', 'step': 14924, 'epoch': 3} {'type': 'loss', 'content': 0.14288707077503204, 'timestamp': '2025-10-01 04:31:29.374567', 'step': 14925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.406531', 'step': 14925, 'epoch': 3} {'type': 'loss', 'content': 0.051535606384277344, 'timestamp': '2025-10-01 04:31:29.410159', 'step': 14926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.440739', 'step': 14926, 'epoch': 3} {'type': 'loss', 'content': 0.11561881750822067, 'timestamp': '2025-10-01 04:31:29.443017', 'step': 14927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.473461', 'step': 14927, 'epoch': 3} {'type': 'loss', 'content': 0.03765549883246422, 'timestamp': '2025-10-01 04:31:29.497059', 'step': 14928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.527843', 'step': 14928, 'epoch': 3} {'type': 'loss', 'content': 0.04470265656709671, 'timestamp': '2025-10-01 04:31:29.530766', 'step': 14929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:29.562656', 'step': 14929, 'epoch': 3} {'type': 'loss', 'content': 0.07208163291215897, 'timestamp': '2025-10-01 04:31:29.564910', 'step': 14930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:29.595638', 'step': 14930, 'epoch': 3} {'type': 'loss', 'content': 0.06861153244972229, 'timestamp': '2025-10-01 04:31:29.597857', 'step': 14931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.628138', 'step': 14931, 'epoch': 3} {'type': 'loss', 'content': 0.1477992683649063, 'timestamp': '2025-10-01 04:31:29.652231', 'step': 14932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.683300', 'step': 14932, 'epoch': 3} {'type': 'loss', 'content': 0.07418093830347061, 'timestamp': '2025-10-01 04:31:29.685209', 'step': 14933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:29.715561', 'step': 14933, 'epoch': 3} {'type': 'loss', 'content': 0.07009133696556091, 'timestamp': '2025-10-01 04:31:29.717557', 'step': 14934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:29.747542', 'step': 14934, 'epoch': 3} {'type': 'loss', 'content': 0.09889554977416992, 'timestamp': '2025-10-01 04:31:29.749385', 'step': 14935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:29.779390', 'step': 14935, 'epoch': 3} {'type': 'loss', 'content': 0.14775486290454865, 'timestamp': '2025-10-01 04:31:29.803071', 'step': 14936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:29.833887', 'step': 14936, 'epoch': 3} {'type': 'loss', 'content': 0.03374502807855606, 'timestamp': '2025-10-01 04:31:29.835726', 'step': 14937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:29.865695', 'step': 14937, 'epoch': 3} {'type': 'loss', 'content': 0.07875161617994308, 'timestamp': '2025-10-01 04:31:29.868180', 'step': 14938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:29.899667', 'step': 14938, 'epoch': 3} {'type': 'loss', 'content': 0.13528786599636078, 'timestamp': '2025-10-01 04:31:29.902076', 'step': 14939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:29.933946', 'step': 14939, 'epoch': 3} {'type': 'loss', 'content': 0.06813044100999832, 'timestamp': '2025-10-01 04:31:29.964552', 'step': 14940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:29.994308', 'step': 14940, 'epoch': 3} {'type': 'loss', 'content': 0.038770075887441635, 'timestamp': '2025-10-01 04:31:29.996744', 'step': 14941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:30.036115', 'step': 14941, 'epoch': 3} {'type': 'loss', 'content': 0.10970079898834229, 'timestamp': '2025-10-01 04:31:30.040127', 'step': 14942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:30.072909', 'step': 14942, 'epoch': 3} {'type': 'loss', 'content': 0.034692008048295975, 'timestamp': '2025-10-01 04:31:30.075817', 'step': 14943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:30.107340', 'step': 14943, 'epoch': 3} {'type': 'loss', 'content': 0.10066092759370804, 'timestamp': '2025-10-01 04:31:30.131116', 'step': 14944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.161256', 'step': 14944, 'epoch': 3} {'type': 'loss', 'content': 0.026910779997706413, 'timestamp': '2025-10-01 04:31:30.163363', 'step': 14945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:30.194355', 'step': 14945, 'epoch': 3} {'type': 'loss', 'content': 0.11416473239660263, 'timestamp': '2025-10-01 04:31:30.197097', 'step': 14946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.227626', 'step': 14946, 'epoch': 3} {'type': 'loss', 'content': 0.13163840770721436, 'timestamp': '2025-10-01 04:31:30.229527', 'step': 14947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:30.267865', 'step': 14947, 'epoch': 3} {'type': 'loss', 'content': 0.08495461195707321, 'timestamp': '2025-10-01 04:31:30.291268', 'step': 14948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:30.322001', 'step': 14948, 'epoch': 3} {'type': 'loss', 'content': 0.060049235820770264, 'timestamp': '2025-10-01 04:31:30.324067', 'step': 14949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:30.355188', 'step': 14949, 'epoch': 3} {'type': 'loss', 'content': 0.03800133243203163, 'timestamp': '2025-10-01 04:31:30.358607', 'step': 14950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.389598', 'step': 14950, 'epoch': 3} {'type': 'loss', 'content': 0.08830989897251129, 'timestamp': '2025-10-01 04:31:30.392011', 'step': 14951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:30.427371', 'step': 14951, 'epoch': 3} {'type': 'loss', 'content': 0.04189816862344742, 'timestamp': '2025-10-01 04:31:30.451298', 'step': 14952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.483601', 'step': 14952, 'epoch': 3} {'type': 'loss', 'content': 0.08777479082345963, 'timestamp': '2025-10-01 04:31:30.486715', 'step': 14953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.519205', 'step': 14953, 'epoch': 3} {'type': 'loss', 'content': 0.07703925669193268, 'timestamp': '2025-10-01 04:31:30.522034', 'step': 14954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:30.556018', 'step': 14954, 'epoch': 3} {'type': 'loss', 'content': 0.13023820519447327, 'timestamp': '2025-10-01 04:31:30.558286', 'step': 14955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:30.591925', 'step': 14955, 'epoch': 3} {'type': 'loss', 'content': 0.04119165614247322, 'timestamp': '2025-10-01 04:31:30.615771', 'step': 14956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.647821', 'step': 14956, 'epoch': 3} {'type': 'loss', 'content': 0.043072789907455444, 'timestamp': '2025-10-01 04:31:30.650253', 'step': 14957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:30.683755', 'step': 14957, 'epoch': 3} {'type': 'loss', 'content': 0.07920965552330017, 'timestamp': '2025-10-01 04:31:30.686021', 'step': 14958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:30.719043', 'step': 14958, 'epoch': 3} {'type': 'loss', 'content': 0.10152596235275269, 'timestamp': '2025-10-01 04:31:30.721455', 'step': 14959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.753050', 'step': 14959, 'epoch': 3} {'type': 'loss', 'content': 0.0502074658870697, 'timestamp': '2025-10-01 04:31:30.776994', 'step': 14960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:30.814392', 'step': 14960, 'epoch': 3} {'type': 'loss', 'content': 0.034405600279569626, 'timestamp': '2025-10-01 04:31:30.816581', 'step': 14961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.848436', 'step': 14961, 'epoch': 3} {'type': 'loss', 'content': 0.09530556946992874, 'timestamp': '2025-10-01 04:31:30.850612', 'step': 14962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.882062', 'step': 14962, 'epoch': 3} {'type': 'loss', 'content': 0.08614698797464371, 'timestamp': '2025-10-01 04:31:30.884148', 'step': 14963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.914284', 'step': 14963, 'epoch': 3} {'type': 'loss', 'content': 0.08378751575946808, 'timestamp': '2025-10-01 04:31:30.937904', 'step': 14964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:30.969044', 'step': 14964, 'epoch': 3} {'type': 'loss', 'content': 0.10604707896709442, 'timestamp': '2025-10-01 04:31:30.971461', 'step': 14965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:31.004718', 'step': 14965, 'epoch': 3} {'type': 'loss', 'content': 0.0466427356004715, 'timestamp': '2025-10-01 04:31:31.007242', 'step': 14966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:31.038966', 'step': 14966, 'epoch': 3} {'type': 'loss', 'content': 0.08962380886077881, 'timestamp': '2025-10-01 04:31:31.041621', 'step': 14967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:31.072996', 'step': 14967, 'epoch': 3} {'type': 'loss', 'content': 0.09745863080024719, 'timestamp': '2025-10-01 04:31:31.096892', 'step': 14968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:31.129881', 'step': 14968, 'epoch': 3} {'type': 'loss', 'content': 0.11475030332803726, 'timestamp': '2025-10-01 04:31:31.132176', 'step': 14969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:31.163116', 'step': 14969, 'epoch': 3} {'type': 'loss', 'content': 0.06918440759181976, 'timestamp': '2025-10-01 04:31:31.176424', 'step': 14970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:31.219542', 'step': 14970, 'epoch': 3} {'type': 'loss', 'content': 0.08784525841474533, 'timestamp': '2025-10-01 04:31:31.221688', 'step': 14971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:31.262590', 'step': 14971, 'epoch': 3} {'type': 'loss', 'content': 0.08138929307460785, 'timestamp': '2025-10-01 04:31:31.286313', 'step': 14972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:31.318936', 'step': 14972, 'epoch': 3} {'type': 'loss', 'content': 0.05355583131313324, 'timestamp': '2025-10-01 04:31:31.322877', 'step': 14973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:31.365925', 'step': 14973, 'epoch': 3} {'type': 'loss', 'content': 0.09261690825223923, 'timestamp': '2025-10-01 04:31:31.373098', 'step': 14974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:31.424706', 'step': 14974, 'epoch': 3} {'type': 'loss', 'content': 0.05994587764143944, 'timestamp': '2025-10-01 04:31:31.427165', 'step': 14975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:31.470284', 'step': 14975, 'epoch': 3} {'type': 'loss', 'content': 0.05162390321493149, 'timestamp': '2025-10-01 04:31:31.494463', 'step': 14976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:31.533947', 'step': 14976, 'epoch': 3} {'type': 'loss', 'content': 0.08589301258325577, 'timestamp': '2025-10-01 04:31:31.536277', 'step': 14977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:31.580852', 'step': 14977, 'epoch': 3} {'type': 'loss', 'content': 0.08349710702896118, 'timestamp': '2025-10-01 04:31:31.583194', 'step': 14978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:31.618060', 'step': 14978, 'epoch': 3} {'type': 'loss', 'content': 0.10946547240018845, 'timestamp': '2025-10-01 04:31:31.620265', 'step': 14979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:31.655434', 'step': 14979, 'epoch': 3} {'type': 'loss', 'content': 0.06144345551729202, 'timestamp': '2025-10-01 04:31:31.679342', 'step': 14980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:31.714843', 'step': 14980, 'epoch': 3} {'type': 'loss', 'content': 0.08093753457069397, 'timestamp': '2025-10-01 04:31:31.717455', 'step': 14981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:31.752551', 'step': 14981, 'epoch': 3} {'type': 'loss', 'content': 0.05404289439320564, 'timestamp': '2025-10-01 04:31:31.755086', 'step': 14982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:31.807957', 'step': 14982, 'epoch': 3} {'type': 'loss', 'content': 0.1672452837228775, 'timestamp': '2025-10-01 04:31:31.810568', 'step': 14983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:31.859327', 'step': 14983, 'epoch': 3} {'type': 'loss', 'content': 0.060813143849372864, 'timestamp': '2025-10-01 04:31:31.883284', 'step': 14984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:31.917668', 'step': 14984, 'epoch': 3} {'type': 'loss', 'content': 0.0394449457526207, 'timestamp': '2025-10-01 04:31:31.919869', 'step': 14985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:31.951683', 'step': 14985, 'epoch': 3} {'type': 'loss', 'content': 0.09346329420804977, 'timestamp': '2025-10-01 04:31:31.954650', 'step': 14986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:32.012001', 'step': 14986, 'epoch': 3} {'type': 'loss', 'content': 0.030479392036795616, 'timestamp': '2025-10-01 04:31:32.014417', 'step': 14987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:32.046859', 'step': 14987, 'epoch': 3} {'type': 'loss', 'content': 0.08600465208292007, 'timestamp': '2025-10-01 04:31:32.070712', 'step': 14988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:32.102629', 'step': 14988, 'epoch': 3} {'type': 'loss', 'content': 0.13720519840717316, 'timestamp': '2025-10-01 04:31:32.104993', 'step': 14989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:32.156316', 'step': 14989, 'epoch': 3} {'type': 'loss', 'content': 0.09066397696733475, 'timestamp': '2025-10-01 04:31:32.158780', 'step': 14990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:32.195855', 'step': 14990, 'epoch': 3} {'type': 'loss', 'content': 0.07319262623786926, 'timestamp': '2025-10-01 04:31:32.199796', 'step': 14991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:32.250677', 'step': 14991, 'epoch': 3} {'type': 'loss', 'content': 0.059069305658340454, 'timestamp': '2025-10-01 04:31:32.277644', 'step': 14992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:32.337329', 'step': 14992, 'epoch': 3} {'type': 'loss', 'content': 0.08051525801420212, 'timestamp': '2025-10-01 04:31:32.339649', 'step': 14993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:32.390994', 'step': 14993, 'epoch': 3} {'type': 'loss', 'content': 0.04855445772409439, 'timestamp': '2025-10-01 04:31:32.393424', 'step': 14994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:32.430281', 'step': 14994, 'epoch': 3} {'type': 'loss', 'content': 0.07722236961126328, 'timestamp': '2025-10-01 04:31:32.433717', 'step': 14995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:32.468729', 'step': 14995, 'epoch': 3} {'type': 'loss', 'content': 0.08965902775526047, 'timestamp': '2025-10-01 04:31:32.492624', 'step': 14996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:32.533328', 'step': 14996, 'epoch': 3} {'type': 'loss', 'content': 0.05550369992852211, 'timestamp': '2025-10-01 04:31:32.535586', 'step': 14997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:32.571120', 'step': 14997, 'epoch': 3} {'type': 'loss', 'content': 0.055601708590984344, 'timestamp': '2025-10-01 04:31:32.573469', 'step': 14998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:32.603623', 'step': 14998, 'epoch': 3} {'type': 'loss', 'content': 0.042200490832328796, 'timestamp': '2025-10-01 04:31:32.606050', 'step': 14999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:32.638554', 'step': 14999, 'epoch': 3} {'type': 'loss', 'content': 0.07080407440662384, 'timestamp': '2025-10-01 04:31:32.662370', 'step': 15000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15000', 'timestamp': '2025-10-01 04:31:38.052001', 'step': 15000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:38.083652', 'step': 15000, 'epoch': 3} {'type': 'loss', 'content': 0.0800948441028595, 'timestamp': '2025-10-01 04:31:38.086011', 'step': 15001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:38.117792', 'step': 15001, 'epoch': 3} {'type': 'loss', 'content': 0.0974053367972374, 'timestamp': '2025-10-01 04:31:38.120102', 'step': 15002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:38.150679', 'step': 15002, 'epoch': 3} {'type': 'loss', 'content': 0.10163627564907074, 'timestamp': '2025-10-01 04:31:38.157986', 'step': 15003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:38.187939', 'step': 15003, 'epoch': 3} {'type': 'loss', 'content': 0.09595244377851486, 'timestamp': '2025-10-01 04:31:38.212853', 'step': 15004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:38.249680', 'step': 15004, 'epoch': 3} {'type': 'loss', 'content': 0.05407394841313362, 'timestamp': '2025-10-01 04:31:38.252058', 'step': 15005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:38.282714', 'step': 15005, 'epoch': 3} {'type': 'loss', 'content': 0.04625077173113823, 'timestamp': '2025-10-01 04:31:38.284772', 'step': 15006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:38.315325', 'step': 15006, 'epoch': 3} {'type': 'loss', 'content': 0.06538500636816025, 'timestamp': '2025-10-01 04:31:38.317724', 'step': 15007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:38.348110', 'step': 15007, 'epoch': 3} {'type': 'loss', 'content': 0.049747057259082794, 'timestamp': '2025-10-01 04:31:38.371963', 'step': 15008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:38.402407', 'step': 15008, 'epoch': 3} {'type': 'loss', 'content': 0.06502742320299149, 'timestamp': '2025-10-01 04:31:38.404886', 'step': 15009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:38.434754', 'step': 15009, 'epoch': 3} {'type': 'loss', 'content': 0.11771614849567413, 'timestamp': '2025-10-01 04:31:38.437350', 'step': 15010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:38.468021', 'step': 15010, 'epoch': 3} {'type': 'loss', 'content': 0.07004625350236893, 'timestamp': '2025-10-01 04:31:38.486737', 'step': 15011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:38.517231', 'step': 15011, 'epoch': 3} {'type': 'loss', 'content': 0.05567488446831703, 'timestamp': '2025-10-01 04:31:38.540989', 'step': 15012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:38.571527', 'step': 15012, 'epoch': 3} {'type': 'loss', 'content': 0.09532585740089417, 'timestamp': '2025-10-01 04:31:38.574216', 'step': 15013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:38.604642', 'step': 15013, 'epoch': 3} {'type': 'loss', 'content': 0.07664725929498672, 'timestamp': '2025-10-01 04:31:38.613027', 'step': 15014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:38.643242', 'step': 15014, 'epoch': 3} {'type': 'loss', 'content': 0.05151968449354172, 'timestamp': '2025-10-01 04:31:38.645672', 'step': 15015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:38.675681', 'step': 15015, 'epoch': 3} {'type': 'loss', 'content': 0.08134854584932327, 'timestamp': '2025-10-01 04:31:38.699492', 'step': 15016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:38.731227', 'step': 15016, 'epoch': 3} {'type': 'loss', 'content': 0.08650298416614532, 'timestamp': '2025-10-01 04:31:38.734157', 'step': 15017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:38.764301', 'step': 15017, 'epoch': 3} {'type': 'loss', 'content': 0.09062351286411285, 'timestamp': '2025-10-01 04:31:38.766646', 'step': 15018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:38.803955', 'step': 15018, 'epoch': 3} {'type': 'loss', 'content': 0.08540833741426468, 'timestamp': '2025-10-01 04:31:38.806280', 'step': 15019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:38.836282', 'step': 15019, 'epoch': 3} {'type': 'loss', 'content': 0.08091583102941513, 'timestamp': '2025-10-01 04:31:38.860720', 'step': 15020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:38.893335', 'step': 15020, 'epoch': 3} {'type': 'loss', 'content': 0.17663387954235077, 'timestamp': '2025-10-01 04:31:38.895600', 'step': 15021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:38.936618', 'step': 15021, 'epoch': 3} {'type': 'loss', 'content': 0.046066273003816605, 'timestamp': '2025-10-01 04:31:38.943144', 'step': 15022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:38.974569', 'step': 15022, 'epoch': 3} {'type': 'loss', 'content': 0.08010756969451904, 'timestamp': '2025-10-01 04:31:38.977408', 'step': 15023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:39.008370', 'step': 15023, 'epoch': 3} {'type': 'loss', 'content': 0.1465877741575241, 'timestamp': '2025-10-01 04:31:39.032144', 'step': 15024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:39.062385', 'step': 15024, 'epoch': 3} {'type': 'loss', 'content': 0.12430287897586823, 'timestamp': '2025-10-01 04:31:39.064665', 'step': 15025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:39.094839', 'step': 15025, 'epoch': 3} {'type': 'loss', 'content': 0.055262066423892975, 'timestamp': '2025-10-01 04:31:39.097168', 'step': 15026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:39.128133', 'step': 15026, 'epoch': 3} {'type': 'loss', 'content': 0.07508339732885361, 'timestamp': '2025-10-01 04:31:39.130218', 'step': 15027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:39.160299', 'step': 15027, 'epoch': 3} {'type': 'loss', 'content': 0.10554493963718414, 'timestamp': '2025-10-01 04:31:39.184245', 'step': 15028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:39.214410', 'step': 15028, 'epoch': 3} {'type': 'loss', 'content': 0.07725661247968674, 'timestamp': '2025-10-01 04:31:39.216828', 'step': 15029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:39.246637', 'step': 15029, 'epoch': 3} {'type': 'loss', 'content': 0.14067330956459045, 'timestamp': '2025-10-01 04:31:39.249080', 'step': 15030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:39.278981', 'step': 15030, 'epoch': 3} {'type': 'loss', 'content': 0.07765797525644302, 'timestamp': '2025-10-01 04:31:39.282236', 'step': 15031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:39.312566', 'step': 15031, 'epoch': 3} {'type': 'loss', 'content': 0.06492362916469574, 'timestamp': '2025-10-01 04:31:39.337121', 'step': 15032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:39.367180', 'step': 15032, 'epoch': 3} {'type': 'loss', 'content': 0.12191122025251389, 'timestamp': '2025-10-01 04:31:39.369389', 'step': 15033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:39.402987', 'step': 15033, 'epoch': 3} {'type': 'loss', 'content': 0.15902921557426453, 'timestamp': '2025-10-01 04:31:39.405607', 'step': 15034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:39.436448', 'step': 15034, 'epoch': 3} {'type': 'loss', 'content': 0.07692313939332962, 'timestamp': '2025-10-01 04:31:39.438791', 'step': 15035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:39.470429', 'step': 15035, 'epoch': 3} {'type': 'loss', 'content': 0.16942434012889862, 'timestamp': '2025-10-01 04:31:39.497301', 'step': 15036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:39.528290', 'step': 15036, 'epoch': 3} {'type': 'loss', 'content': 0.11036785691976547, 'timestamp': '2025-10-01 04:31:39.530615', 'step': 15037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:39.562810', 'step': 15037, 'epoch': 3} {'type': 'loss', 'content': 0.13947877287864685, 'timestamp': '2025-10-01 04:31:39.565293', 'step': 15038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:39.602784', 'step': 15038, 'epoch': 3} {'type': 'loss', 'content': 0.08025682717561722, 'timestamp': '2025-10-01 04:31:39.605207', 'step': 15039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:39.637239', 'step': 15039, 'epoch': 3} {'type': 'loss', 'content': 0.18641147017478943, 'timestamp': '2025-10-01 04:31:39.660969', 'step': 15040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:39.695191', 'step': 15040, 'epoch': 3} {'type': 'loss', 'content': 0.1480245292186737, 'timestamp': '2025-10-01 04:31:39.697390', 'step': 15041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:39.729196', 'step': 15041, 'epoch': 3} {'type': 'loss', 'content': 0.07241502404212952, 'timestamp': '2025-10-01 04:31:39.732772', 'step': 15042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:39.762861', 'step': 15042, 'epoch': 3} {'type': 'loss', 'content': 0.12491049617528915, 'timestamp': '2025-10-01 04:31:39.765296', 'step': 15043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:39.796643', 'step': 15043, 'epoch': 3} {'type': 'loss', 'content': 0.09617611020803452, 'timestamp': '2025-10-01 04:31:39.820349', 'step': 15044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:39.852105', 'step': 15044, 'epoch': 3} {'type': 'loss', 'content': 0.011593534611165524, 'timestamp': '2025-10-01 04:31:39.854338', 'step': 15045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:39.886798', 'step': 15045, 'epoch': 3} {'type': 'loss', 'content': 0.03325709328055382, 'timestamp': '2025-10-01 04:31:39.889109', 'step': 15046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:39.923913', 'step': 15046, 'epoch': 3} {'type': 'loss', 'content': 0.1023312509059906, 'timestamp': '2025-10-01 04:31:39.926102', 'step': 15047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:39.956562', 'step': 15047, 'epoch': 3} {'type': 'loss', 'content': 0.20615524053573608, 'timestamp': '2025-10-01 04:31:39.980143', 'step': 15048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:40.013393', 'step': 15048, 'epoch': 3} {'type': 'loss', 'content': 0.12628522515296936, 'timestamp': '2025-10-01 04:31:40.015528', 'step': 15049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:40.046230', 'step': 15049, 'epoch': 3} {'type': 'loss', 'content': 0.10120322555303574, 'timestamp': '2025-10-01 04:31:40.048619', 'step': 15050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:40.092192', 'step': 15050, 'epoch': 3} {'type': 'loss', 'content': 0.07733094692230225, 'timestamp': '2025-10-01 04:31:40.094393', 'step': 15051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:40.129311', 'step': 15051, 'epoch': 3} {'type': 'loss', 'content': 0.0588233657181263, 'timestamp': '2025-10-01 04:31:40.153006', 'step': 15052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:40.185872', 'step': 15052, 'epoch': 3} {'type': 'loss', 'content': 0.1545589566230774, 'timestamp': '2025-10-01 04:31:40.187920', 'step': 15053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:40.218370', 'step': 15053, 'epoch': 3} {'type': 'loss', 'content': 0.14971224963665009, 'timestamp': '2025-10-01 04:31:40.220539', 'step': 15054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:40.252446', 'step': 15054, 'epoch': 3} {'type': 'loss', 'content': 0.046097490936517715, 'timestamp': '2025-10-01 04:31:40.254615', 'step': 15055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:40.286430', 'step': 15055, 'epoch': 3} {'type': 'loss', 'content': 0.09393776953220367, 'timestamp': '2025-10-01 04:31:40.309984', 'step': 15056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:40.341680', 'step': 15056, 'epoch': 3} {'type': 'loss', 'content': 0.08239153027534485, 'timestamp': '2025-10-01 04:31:40.343874', 'step': 15057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:40.375838', 'step': 15057, 'epoch': 3} {'type': 'loss', 'content': 0.0847819373011589, 'timestamp': '2025-10-01 04:31:40.378108', 'step': 15058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:40.414961', 'step': 15058, 'epoch': 3} {'type': 'loss', 'content': 0.09302213042974472, 'timestamp': '2025-10-01 04:31:40.417200', 'step': 15059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:40.446908', 'step': 15059, 'epoch': 3} {'type': 'loss', 'content': 0.09183722734451294, 'timestamp': '2025-10-01 04:31:40.470538', 'step': 15060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:40.505629', 'step': 15060, 'epoch': 3} {'type': 'loss', 'content': 0.14471550285816193, 'timestamp': '2025-10-01 04:31:40.508056', 'step': 15061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:40.537511', 'step': 15061, 'epoch': 3} {'type': 'loss', 'content': 0.07261083275079727, 'timestamp': '2025-10-01 04:31:40.539656', 'step': 15062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:40.569193', 'step': 15062, 'epoch': 3} {'type': 'loss', 'content': 0.13895121216773987, 'timestamp': '2025-10-01 04:31:40.571222', 'step': 15063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:40.604263', 'step': 15063, 'epoch': 3} {'type': 'loss', 'content': 0.10975133627653122, 'timestamp': '2025-10-01 04:31:40.627917', 'step': 15064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:40.659083', 'step': 15064, 'epoch': 3} {'type': 'loss', 'content': 0.1281544268131256, 'timestamp': '2025-10-01 04:31:40.661297', 'step': 15065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:40.692812', 'step': 15065, 'epoch': 3} {'type': 'loss', 'content': 0.16814014315605164, 'timestamp': '2025-10-01 04:31:40.695062', 'step': 15066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:40.726626', 'step': 15066, 'epoch': 3} {'type': 'loss', 'content': 0.10477431863546371, 'timestamp': '2025-10-01 04:31:40.729351', 'step': 15067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:40.759170', 'step': 15067, 'epoch': 3} {'type': 'loss', 'content': 0.05974353477358818, 'timestamp': '2025-10-01 04:31:40.783007', 'step': 15068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:40.816745', 'step': 15068, 'epoch': 3} {'type': 'loss', 'content': 0.028800705447793007, 'timestamp': '2025-10-01 04:31:40.819587', 'step': 15069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:40.849789', 'step': 15069, 'epoch': 3} {'type': 'loss', 'content': 0.10177291184663773, 'timestamp': '2025-10-01 04:31:40.853342', 'step': 15070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:40.893849', 'step': 15070, 'epoch': 3} {'type': 'loss', 'content': 0.0843803882598877, 'timestamp': '2025-10-01 04:31:40.896016', 'step': 15071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:40.934066', 'step': 15071, 'epoch': 3} {'type': 'loss', 'content': 0.0566200315952301, 'timestamp': '2025-10-01 04:31:40.957723', 'step': 15072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:40.988721', 'step': 15072, 'epoch': 3} {'type': 'loss', 'content': 0.0904151052236557, 'timestamp': '2025-10-01 04:31:40.990924', 'step': 15073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:41.022143', 'step': 15073, 'epoch': 3} {'type': 'loss', 'content': 0.14438433945178986, 'timestamp': '2025-10-01 04:31:41.024666', 'step': 15074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:31:41.057190', 'step': 15074, 'epoch': 3} {'type': 'loss', 'content': 0.22487547993659973, 'timestamp': '2025-10-01 04:31:41.061489', 'step': 15075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:41.097540', 'step': 15075, 'epoch': 3} {'type': 'loss', 'content': 0.13039669394493103, 'timestamp': '2025-10-01 04:31:41.121633', 'step': 15076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:41.157678', 'step': 15076, 'epoch': 3} {'type': 'loss', 'content': 0.05668734759092331, 'timestamp': '2025-10-01 04:31:41.162227', 'step': 15077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:41.197907', 'step': 15077, 'epoch': 3} {'type': 'loss', 'content': 0.12932297587394714, 'timestamp': '2025-10-01 04:31:41.200071', 'step': 15078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:41.242608', 'step': 15078, 'epoch': 3} {'type': 'loss', 'content': 0.09308458119630814, 'timestamp': '2025-10-01 04:31:41.245794', 'step': 15079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:41.289169', 'step': 15079, 'epoch': 3} {'type': 'loss', 'content': 0.06817413121461868, 'timestamp': '2025-10-01 04:31:41.312853', 'step': 15080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:41.348996', 'step': 15080, 'epoch': 3} {'type': 'loss', 'content': 0.1127520352602005, 'timestamp': '2025-10-01 04:31:41.353071', 'step': 15081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:41.387056', 'step': 15081, 'epoch': 3} {'type': 'loss', 'content': 0.1295565664768219, 'timestamp': '2025-10-01 04:31:41.389603', 'step': 15082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:41.422583', 'step': 15082, 'epoch': 3} {'type': 'loss', 'content': 0.07561774551868439, 'timestamp': '2025-10-01 04:31:41.425093', 'step': 15083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:41.457507', 'step': 15083, 'epoch': 3} {'type': 'loss', 'content': 0.09264088422060013, 'timestamp': '2025-10-01 04:31:41.481441', 'step': 15084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:41.515250', 'step': 15084, 'epoch': 3} {'type': 'loss', 'content': 0.12532222270965576, 'timestamp': '2025-10-01 04:31:41.517856', 'step': 15085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:41.548044', 'step': 15085, 'epoch': 3} {'type': 'loss', 'content': 0.066167451441288, 'timestamp': '2025-10-01 04:31:41.550385', 'step': 15086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:41.581455', 'step': 15086, 'epoch': 3} {'type': 'loss', 'content': 0.07130521535873413, 'timestamp': '2025-10-01 04:31:41.584507', 'step': 15087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:41.615019', 'step': 15087, 'epoch': 3} {'type': 'loss', 'content': 0.056900180876255035, 'timestamp': '2025-10-01 04:31:41.639043', 'step': 15088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:41.669675', 'step': 15088, 'epoch': 3} {'type': 'loss', 'content': 0.05079454556107521, 'timestamp': '2025-10-01 04:31:41.672169', 'step': 15089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:41.702065', 'step': 15089, 'epoch': 3} {'type': 'loss', 'content': 0.09537126868963242, 'timestamp': '2025-10-01 04:31:41.704494', 'step': 15090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:41.735021', 'step': 15090, 'epoch': 3} {'type': 'loss', 'content': 0.07954365015029907, 'timestamp': '2025-10-01 04:31:41.737522', 'step': 15091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:41.768010', 'step': 15091, 'epoch': 3} {'type': 'loss', 'content': 0.1535084843635559, 'timestamp': '2025-10-01 04:31:41.791718', 'step': 15092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:41.821579', 'step': 15092, 'epoch': 3} {'type': 'loss', 'content': 0.1610526293516159, 'timestamp': '2025-10-01 04:31:41.823724', 'step': 15093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:41.855272', 'step': 15093, 'epoch': 3} {'type': 'loss', 'content': 0.08237429708242416, 'timestamp': '2025-10-01 04:31:41.857872', 'step': 15094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:41.888097', 'step': 15094, 'epoch': 3} {'type': 'loss', 'content': 0.10115182399749756, 'timestamp': '2025-10-01 04:31:41.890690', 'step': 15095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:41.924575', 'step': 15095, 'epoch': 3} {'type': 'loss', 'content': 0.14011341333389282, 'timestamp': '2025-10-01 04:31:41.948582', 'step': 15096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:41.980240', 'step': 15096, 'epoch': 3} {'type': 'loss', 'content': 0.10754579305648804, 'timestamp': '2025-10-01 04:31:41.982586', 'step': 15097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:42.013690', 'step': 15097, 'epoch': 3} {'type': 'loss', 'content': 0.0322757326066494, 'timestamp': '2025-10-01 04:31:42.016524', 'step': 15098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:42.049036', 'step': 15098, 'epoch': 3} {'type': 'loss', 'content': 0.15007486939430237, 'timestamp': '2025-10-01 04:31:42.051948', 'step': 15099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:42.083152', 'step': 15099, 'epoch': 3} {'type': 'loss', 'content': 0.061442676931619644, 'timestamp': '2025-10-01 04:31:42.107328', 'step': 15100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:42.138239', 'step': 15100, 'epoch': 3} {'type': 'loss', 'content': 0.11838870495557785, 'timestamp': '2025-10-01 04:31:42.141079', 'step': 15101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:42.173249', 'step': 15101, 'epoch': 3} {'type': 'loss', 'content': 0.041260696947574615, 'timestamp': '2025-10-01 04:31:42.175841', 'step': 15102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:42.206611', 'step': 15102, 'epoch': 3} {'type': 'loss', 'content': 0.18104609847068787, 'timestamp': '2025-10-01 04:31:42.209102', 'step': 15103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:42.239771', 'step': 15103, 'epoch': 3} {'type': 'loss', 'content': 0.0651218593120575, 'timestamp': '2025-10-01 04:31:42.264535', 'step': 15104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:42.294681', 'step': 15104, 'epoch': 3} {'type': 'loss', 'content': 0.10402119159698486, 'timestamp': '2025-10-01 04:31:42.296671', 'step': 15105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:42.327080', 'step': 15105, 'epoch': 3} {'type': 'loss', 'content': 0.08437913656234741, 'timestamp': '2025-10-01 04:31:42.329282', 'step': 15106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:42.359856', 'step': 15106, 'epoch': 3} {'type': 'loss', 'content': 0.0868477001786232, 'timestamp': '2025-10-01 04:31:42.362118', 'step': 15107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:42.391613', 'step': 15107, 'epoch': 3} {'type': 'loss', 'content': 0.09922381490468979, 'timestamp': '2025-10-01 04:31:42.415220', 'step': 15108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:31:42.445361', 'step': 15108, 'epoch': 3} {'type': 'loss', 'content': 0.10961314290761948, 'timestamp': '2025-10-01 04:31:42.447519', 'step': 15109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:42.478076', 'step': 15109, 'epoch': 3} {'type': 'loss', 'content': 0.03276504948735237, 'timestamp': '2025-10-01 04:31:42.480400', 'step': 15110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:42.510961', 'step': 15110, 'epoch': 3} {'type': 'loss', 'content': 0.038991738110780716, 'timestamp': '2025-10-01 04:31:42.513355', 'step': 15111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:42.543363', 'step': 15111, 'epoch': 3} {'type': 'loss', 'content': 0.08156533539295197, 'timestamp': '2025-10-01 04:31:42.567059', 'step': 15112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:42.597203', 'step': 15112, 'epoch': 3} {'type': 'loss', 'content': 0.11736785620450974, 'timestamp': '2025-10-01 04:31:42.599366', 'step': 15113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:42.629484', 'step': 15113, 'epoch': 3} {'type': 'loss', 'content': 0.11557450890541077, 'timestamp': '2025-10-01 04:31:42.631334', 'step': 15114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:42.661109', 'step': 15114, 'epoch': 3} {'type': 'loss', 'content': 0.08325009793043137, 'timestamp': '2025-10-01 04:31:42.663377', 'step': 15115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:42.694038', 'step': 15115, 'epoch': 3} {'type': 'loss', 'content': 0.04836956039071083, 'timestamp': '2025-10-01 04:31:42.717681', 'step': 15116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:42.748889', 'step': 15116, 'epoch': 3} {'type': 'loss', 'content': 0.09394652396440506, 'timestamp': '2025-10-01 04:31:42.751122', 'step': 15117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:42.781679', 'step': 15117, 'epoch': 3} {'type': 'loss', 'content': 0.08544740825891495, 'timestamp': '2025-10-01 04:31:42.783875', 'step': 15118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:42.827217', 'step': 15118, 'epoch': 3} {'type': 'loss', 'content': 0.07088443636894226, 'timestamp': '2025-10-01 04:31:42.829570', 'step': 15119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:42.859902', 'step': 15119, 'epoch': 3} {'type': 'loss', 'content': 0.1126495748758316, 'timestamp': '2025-10-01 04:31:42.883582', 'step': 15120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:42.924379', 'step': 15120, 'epoch': 3} {'type': 'loss', 'content': 0.19165638089179993, 'timestamp': '2025-10-01 04:31:42.926562', 'step': 15121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:42.957881', 'step': 15121, 'epoch': 3} {'type': 'loss', 'content': 0.056986358016729355, 'timestamp': '2025-10-01 04:31:42.960125', 'step': 15122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:42.990483', 'step': 15122, 'epoch': 3} {'type': 'loss', 'content': 0.08185574412345886, 'timestamp': '2025-10-01 04:31:42.992660', 'step': 15123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:43.022060', 'step': 15123, 'epoch': 3} {'type': 'loss', 'content': 0.08314982801675797, 'timestamp': '2025-10-01 04:31:43.045864', 'step': 15124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:43.076820', 'step': 15124, 'epoch': 3} {'type': 'loss', 'content': 0.14323414862155914, 'timestamp': '2025-10-01 04:31:43.079140', 'step': 15125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:43.108817', 'step': 15125, 'epoch': 3} {'type': 'loss', 'content': 0.06830715388059616, 'timestamp': '2025-10-01 04:31:43.111317', 'step': 15126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:43.141204', 'step': 15126, 'epoch': 3} {'type': 'loss', 'content': 0.09888381510972977, 'timestamp': '2025-10-01 04:31:43.145493', 'step': 15127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:43.175503', 'step': 15127, 'epoch': 3} {'type': 'loss', 'content': 0.10882189869880676, 'timestamp': '2025-10-01 04:31:43.199134', 'step': 15128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:43.229351', 'step': 15128, 'epoch': 3} {'type': 'loss', 'content': 0.08679160475730896, 'timestamp': '2025-10-01 04:31:43.231473', 'step': 15129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:43.260898', 'step': 15129, 'epoch': 3} {'type': 'loss', 'content': 0.25282734632492065, 'timestamp': '2025-10-01 04:31:43.263092', 'step': 15130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:43.294821', 'step': 15130, 'epoch': 3} {'type': 'loss', 'content': 0.08688859641551971, 'timestamp': '2025-10-01 04:31:43.297088', 'step': 15131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:43.326954', 'step': 15131, 'epoch': 3} {'type': 'loss', 'content': 0.11572475731372833, 'timestamp': '2025-10-01 04:31:43.350506', 'step': 15132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:43.380758', 'step': 15132, 'epoch': 3} {'type': 'loss', 'content': 0.12597870826721191, 'timestamp': '2025-10-01 04:31:43.382653', 'step': 15133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:43.412009', 'step': 15133, 'epoch': 3} {'type': 'loss', 'content': 0.16207069158554077, 'timestamp': '2025-10-01 04:31:43.414342', 'step': 15134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:43.445327', 'step': 15134, 'epoch': 3} {'type': 'loss', 'content': 0.08853581547737122, 'timestamp': '2025-10-01 04:31:43.447700', 'step': 15135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:43.478156', 'step': 15135, 'epoch': 3} {'type': 'loss', 'content': 0.15659841895103455, 'timestamp': '2025-10-01 04:31:43.501747', 'step': 15136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:43.532195', 'step': 15136, 'epoch': 3} {'type': 'loss', 'content': 0.13118577003479004, 'timestamp': '2025-10-01 04:31:43.534467', 'step': 15137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:43.564519', 'step': 15137, 'epoch': 3} {'type': 'loss', 'content': 0.08439264446496964, 'timestamp': '2025-10-01 04:31:43.566874', 'step': 15138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:43.597926', 'step': 15138, 'epoch': 3} {'type': 'loss', 'content': 0.15038752555847168, 'timestamp': '2025-10-01 04:31:43.600314', 'step': 15139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:43.630617', 'step': 15139, 'epoch': 3} {'type': 'loss', 'content': 0.10191276669502258, 'timestamp': '2025-10-01 04:31:43.654309', 'step': 15140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:43.684296', 'step': 15140, 'epoch': 3} {'type': 'loss', 'content': 0.18739502131938934, 'timestamp': '2025-10-01 04:31:43.686329', 'step': 15141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:43.719091', 'step': 15141, 'epoch': 3} {'type': 'loss', 'content': 0.07923872023820877, 'timestamp': '2025-10-01 04:31:43.721322', 'step': 15142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:43.756183', 'step': 15142, 'epoch': 3} {'type': 'loss', 'content': 0.13115006685256958, 'timestamp': '2025-10-01 04:31:43.761316', 'step': 15143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:43.792113', 'step': 15143, 'epoch': 3} {'type': 'loss', 'content': 0.0394270159304142, 'timestamp': '2025-10-01 04:31:43.815746', 'step': 15144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:43.845819', 'step': 15144, 'epoch': 3} {'type': 'loss', 'content': 0.05991622060537338, 'timestamp': '2025-10-01 04:31:43.850679', 'step': 15145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:43.880477', 'step': 15145, 'epoch': 3} {'type': 'loss', 'content': 0.09667395800352097, 'timestamp': '2025-10-01 04:31:43.883395', 'step': 15146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:43.913684', 'step': 15146, 'epoch': 3} {'type': 'loss', 'content': 0.11015523225069046, 'timestamp': '2025-10-01 04:31:43.919819', 'step': 15147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:43.958636', 'step': 15147, 'epoch': 3} {'type': 'loss', 'content': 0.04444395750761032, 'timestamp': '2025-10-01 04:31:43.982241', 'step': 15148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.013200', 'step': 15148, 'epoch': 3} {'type': 'loss', 'content': 0.13367655873298645, 'timestamp': '2025-10-01 04:31:44.015411', 'step': 15149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:44.046161', 'step': 15149, 'epoch': 3} {'type': 'loss', 'content': 0.04931017756462097, 'timestamp': '2025-10-01 04:31:44.048310', 'step': 15150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.078582', 'step': 15150, 'epoch': 3} {'type': 'loss', 'content': 0.0607842318713665, 'timestamp': '2025-10-01 04:31:44.080592', 'step': 15151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.112767', 'step': 15151, 'epoch': 3} {'type': 'loss', 'content': 0.1113123893737793, 'timestamp': '2025-10-01 04:31:44.136426', 'step': 15152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:44.167411', 'step': 15152, 'epoch': 3} {'type': 'loss', 'content': 0.09826160222291946, 'timestamp': '2025-10-01 04:31:44.169633', 'step': 15153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:44.199557', 'step': 15153, 'epoch': 3} {'type': 'loss', 'content': 0.08321768045425415, 'timestamp': '2025-10-01 04:31:44.201782', 'step': 15154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:44.231896', 'step': 15154, 'epoch': 3} {'type': 'loss', 'content': 0.14629149436950684, 'timestamp': '2025-10-01 04:31:44.234128', 'step': 15155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:44.264050', 'step': 15155, 'epoch': 3} {'type': 'loss', 'content': 0.05981266126036644, 'timestamp': '2025-10-01 04:31:44.287638', 'step': 15156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.317824', 'step': 15156, 'epoch': 3} {'type': 'loss', 'content': 0.12835754454135895, 'timestamp': '2025-10-01 04:31:44.319910', 'step': 15157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.349575', 'step': 15157, 'epoch': 3} {'type': 'loss', 'content': 0.14075343310832977, 'timestamp': '2025-10-01 04:31:44.351646', 'step': 15158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.382592', 'step': 15158, 'epoch': 3} {'type': 'loss', 'content': 0.12222602218389511, 'timestamp': '2025-10-01 04:31:44.384912', 'step': 15159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:44.415374', 'step': 15159, 'epoch': 3} {'type': 'loss', 'content': 0.03868970274925232, 'timestamp': '2025-10-01 04:31:44.439057', 'step': 15160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.472403', 'step': 15160, 'epoch': 3} {'type': 'loss', 'content': 0.13062012195587158, 'timestamp': '2025-10-01 04:31:44.474607', 'step': 15161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:44.504914', 'step': 15161, 'epoch': 3} {'type': 'loss', 'content': 0.029827650636434555, 'timestamp': '2025-10-01 04:31:44.507096', 'step': 15162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.537023', 'step': 15162, 'epoch': 3} {'type': 'loss', 'content': 0.21941787004470825, 'timestamp': '2025-10-01 04:31:44.540350', 'step': 15163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:44.571193', 'step': 15163, 'epoch': 3} {'type': 'loss', 'content': 0.09696958214044571, 'timestamp': '2025-10-01 04:31:44.594684', 'step': 15164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.627507', 'step': 15164, 'epoch': 3} {'type': 'loss', 'content': 0.14726436138153076, 'timestamp': '2025-10-01 04:31:44.629570', 'step': 15165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:44.659659', 'step': 15165, 'epoch': 3} {'type': 'loss', 'content': 0.14728830754756927, 'timestamp': '2025-10-01 04:31:44.661755', 'step': 15166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:44.691706', 'step': 15166, 'epoch': 3} {'type': 'loss', 'content': 0.050375934690237045, 'timestamp': '2025-10-01 04:31:44.693748', 'step': 15167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:44.724064', 'step': 15167, 'epoch': 3} {'type': 'loss', 'content': 0.08266142010688782, 'timestamp': '2025-10-01 04:31:44.747821', 'step': 15168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:44.779885', 'step': 15168, 'epoch': 3} {'type': 'loss', 'content': 0.11510318517684937, 'timestamp': '2025-10-01 04:31:44.782867', 'step': 15169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:44.821151', 'step': 15169, 'epoch': 3} {'type': 'loss', 'content': 0.09303673356771469, 'timestamp': '2025-10-01 04:31:44.823384', 'step': 15170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.853617', 'step': 15170, 'epoch': 3} {'type': 'loss', 'content': 0.08021353930234909, 'timestamp': '2025-10-01 04:31:44.855665', 'step': 15171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:44.885626', 'step': 15171, 'epoch': 3} {'type': 'loss', 'content': 0.06947790831327438, 'timestamp': '2025-10-01 04:31:44.909337', 'step': 15172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:44.940118', 'step': 15172, 'epoch': 3} {'type': 'loss', 'content': 0.16926097869873047, 'timestamp': '2025-10-01 04:31:44.942329', 'step': 15173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:44.973151', 'step': 15173, 'epoch': 3} {'type': 'loss', 'content': 0.0642995685338974, 'timestamp': '2025-10-01 04:31:44.975254', 'step': 15174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:45.004706', 'step': 15174, 'epoch': 3} {'type': 'loss', 'content': 0.07570171356201172, 'timestamp': '2025-10-01 04:31:45.006835', 'step': 15175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.037319', 'step': 15175, 'epoch': 3} {'type': 'loss', 'content': 0.026647068560123444, 'timestamp': '2025-10-01 04:31:45.061531', 'step': 15176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:45.091708', 'step': 15176, 'epoch': 3} {'type': 'loss', 'content': 0.11378402262926102, 'timestamp': '2025-10-01 04:31:45.093768', 'step': 15177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.124444', 'step': 15177, 'epoch': 3} {'type': 'loss', 'content': 0.09579958021640778, 'timestamp': '2025-10-01 04:31:45.126572', 'step': 15178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:45.156623', 'step': 15178, 'epoch': 3} {'type': 'loss', 'content': 0.10253741592168808, 'timestamp': '2025-10-01 04:31:45.158663', 'step': 15179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.188960', 'step': 15179, 'epoch': 3} {'type': 'loss', 'content': 0.1412888914346695, 'timestamp': '2025-10-01 04:31:45.212605', 'step': 15180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.243637', 'step': 15180, 'epoch': 3} {'type': 'loss', 'content': 0.10420984029769897, 'timestamp': '2025-10-01 04:31:45.246039', 'step': 15181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:45.275830', 'step': 15181, 'epoch': 3} {'type': 'loss', 'content': 0.05464334413409233, 'timestamp': '2025-10-01 04:31:45.278455', 'step': 15182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.308627', 'step': 15182, 'epoch': 3} {'type': 'loss', 'content': 0.08468382060527802, 'timestamp': '2025-10-01 04:31:45.311011', 'step': 15183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:31:45.342107', 'step': 15183, 'epoch': 3} {'type': 'loss', 'content': 0.04993851110339165, 'timestamp': '2025-10-01 04:31:45.369998', 'step': 15184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.399714', 'step': 15184, 'epoch': 3} {'type': 'loss', 'content': 0.09586519002914429, 'timestamp': '2025-10-01 04:31:45.402761', 'step': 15185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:45.433009', 'step': 15185, 'epoch': 3} {'type': 'loss', 'content': 0.057084109634160995, 'timestamp': '2025-10-01 04:31:45.449432', 'step': 15186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.493483', 'step': 15186, 'epoch': 3} {'type': 'loss', 'content': 0.0837097316980362, 'timestamp': '2025-10-01 04:31:45.495731', 'step': 15187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:45.526111', 'step': 15187, 'epoch': 3} {'type': 'loss', 'content': 0.06470417976379395, 'timestamp': '2025-10-01 04:31:45.549815', 'step': 15188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.580105', 'step': 15188, 'epoch': 3} {'type': 'loss', 'content': 0.0704728439450264, 'timestamp': '2025-10-01 04:31:45.582868', 'step': 15189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:45.613494', 'step': 15189, 'epoch': 3} {'type': 'loss', 'content': 0.04835136979818344, 'timestamp': '2025-10-01 04:31:45.615891', 'step': 15190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:45.650625', 'step': 15190, 'epoch': 3} {'type': 'loss', 'content': 0.18013820052146912, 'timestamp': '2025-10-01 04:31:45.653106', 'step': 15191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:45.688097', 'step': 15191, 'epoch': 3} {'type': 'loss', 'content': 0.0830051377415657, 'timestamp': '2025-10-01 04:31:45.712207', 'step': 15192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:45.757137', 'step': 15192, 'epoch': 3} {'type': 'loss', 'content': 0.0784769356250763, 'timestamp': '2025-10-01 04:31:45.759924', 'step': 15193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:45.791932', 'step': 15193, 'epoch': 3} {'type': 'loss', 'content': 0.045459210872650146, 'timestamp': '2025-10-01 04:31:45.794482', 'step': 15194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:45.825199', 'step': 15194, 'epoch': 3} {'type': 'loss', 'content': 0.07472646236419678, 'timestamp': '2025-10-01 04:31:45.827434', 'step': 15195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.858088', 'step': 15195, 'epoch': 3} {'type': 'loss', 'content': 0.07925156503915787, 'timestamp': '2025-10-01 04:31:45.881839', 'step': 15196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:45.912396', 'step': 15196, 'epoch': 3} {'type': 'loss', 'content': 0.08248184621334076, 'timestamp': '2025-10-01 04:31:45.914822', 'step': 15197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.944682', 'step': 15197, 'epoch': 3} {'type': 'loss', 'content': 0.08641568571329117, 'timestamp': '2025-10-01 04:31:45.948139', 'step': 15198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:45.978786', 'step': 15198, 'epoch': 3} {'type': 'loss', 'content': 0.08887527137994766, 'timestamp': '2025-10-01 04:31:45.981140', 'step': 15199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:46.011830', 'step': 15199, 'epoch': 3} {'type': 'loss', 'content': 0.07432983070611954, 'timestamp': '2025-10-01 04:31:46.035535', 'step': 15200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:46.065080', 'step': 15200, 'epoch': 3} {'type': 'loss', 'content': 0.050740137696266174, 'timestamp': '2025-10-01 04:31:46.067265', 'step': 15201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:46.097050', 'step': 15201, 'epoch': 3} {'type': 'loss', 'content': 0.0584905669093132, 'timestamp': '2025-10-01 04:31:46.099443', 'step': 15202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:46.130223', 'step': 15202, 'epoch': 3} {'type': 'loss', 'content': 0.05918165668845177, 'timestamp': '2025-10-01 04:31:46.132381', 'step': 15203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:46.163645', 'step': 15203, 'epoch': 3} {'type': 'loss', 'content': 0.029938150197267532, 'timestamp': '2025-10-01 04:31:46.187215', 'step': 15204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:46.217215', 'step': 15204, 'epoch': 3} {'type': 'loss', 'content': 0.12328144907951355, 'timestamp': '2025-10-01 04:31:46.220124', 'step': 15205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:46.253196', 'step': 15205, 'epoch': 3} {'type': 'loss', 'content': 0.16444724798202515, 'timestamp': '2025-10-01 04:31:46.255234', 'step': 15206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:46.285898', 'step': 15206, 'epoch': 3} {'type': 'loss', 'content': 0.19922885298728943, 'timestamp': '2025-10-01 04:31:46.288018', 'step': 15207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:46.317782', 'step': 15207, 'epoch': 3} {'type': 'loss', 'content': 0.1643560528755188, 'timestamp': '2025-10-01 04:31:46.341543', 'step': 15208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:46.371796', 'step': 15208, 'epoch': 3} {'type': 'loss', 'content': 0.10138575732707977, 'timestamp': '2025-10-01 04:31:46.373805', 'step': 15209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:46.404417', 'step': 15209, 'epoch': 3} {'type': 'loss', 'content': 0.06981787830591202, 'timestamp': '2025-10-01 04:31:46.406423', 'step': 15210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:46.437210', 'step': 15210, 'epoch': 3} {'type': 'loss', 'content': 0.07356522232294083, 'timestamp': '2025-10-01 04:31:46.439585', 'step': 15211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:46.470652', 'step': 15211, 'epoch': 3} {'type': 'loss', 'content': 0.1514335572719574, 'timestamp': '2025-10-01 04:31:46.494383', 'step': 15212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:46.524448', 'step': 15212, 'epoch': 3} {'type': 'loss', 'content': 0.15643100440502167, 'timestamp': '2025-10-01 04:31:46.526560', 'step': 15213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:46.556293', 'step': 15213, 'epoch': 3} {'type': 'loss', 'content': 0.08973897993564606, 'timestamp': '2025-10-01 04:31:46.558402', 'step': 15214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:46.588466', 'step': 15214, 'epoch': 3} {'type': 'loss', 'content': 0.10899960249662399, 'timestamp': '2025-10-01 04:31:46.590689', 'step': 15215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:46.620553', 'step': 15215, 'epoch': 3} {'type': 'loss', 'content': 0.045967947691679, 'timestamp': '2025-10-01 04:31:46.644170', 'step': 15216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:46.674530', 'step': 15216, 'epoch': 3} {'type': 'loss', 'content': 0.06647498160600662, 'timestamp': '2025-10-01 04:31:46.676777', 'step': 15217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:46.707597', 'step': 15217, 'epoch': 3} {'type': 'loss', 'content': 0.0867382362484932, 'timestamp': '2025-10-01 04:31:46.709850', 'step': 15218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:46.739992', 'step': 15218, 'epoch': 3} {'type': 'loss', 'content': 0.10791851580142975, 'timestamp': '2025-10-01 04:31:46.742193', 'step': 15219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:46.773835', 'step': 15219, 'epoch': 3} {'type': 'loss', 'content': 0.10699017345905304, 'timestamp': '2025-10-01 04:31:46.797545', 'step': 15220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:46.827208', 'step': 15220, 'epoch': 3} {'type': 'loss', 'content': 0.09760329127311707, 'timestamp': '2025-10-01 04:31:46.829350', 'step': 15221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:46.859330', 'step': 15221, 'epoch': 3} {'type': 'loss', 'content': 0.10048265010118484, 'timestamp': '2025-10-01 04:31:46.861756', 'step': 15222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:46.891602', 'step': 15222, 'epoch': 3} {'type': 'loss', 'content': 0.09451944380998611, 'timestamp': '2025-10-01 04:31:46.893942', 'step': 15223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:46.923109', 'step': 15223, 'epoch': 3} {'type': 'loss', 'content': 0.12142381072044373, 'timestamp': '2025-10-01 04:31:46.946840', 'step': 15224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-10-01 04:31:46.983871', 'step': 15224, 'epoch': 3} {'type': 'loss', 'content': 0.061437420547008514, 'timestamp': '2025-10-01 04:31:46.999554', 'step': 15225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:47.030283', 'step': 15225, 'epoch': 3} {'type': 'loss', 'content': 0.13839636743068695, 'timestamp': '2025-10-01 04:31:47.032606', 'step': 15226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:47.063102', 'step': 15226, 'epoch': 3} {'type': 'loss', 'content': 0.0743362084031105, 'timestamp': '2025-10-01 04:31:47.065482', 'step': 15227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:47.108842', 'step': 15227, 'epoch': 3} {'type': 'loss', 'content': 0.0924367755651474, 'timestamp': '2025-10-01 04:31:47.132531', 'step': 15228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:47.162657', 'step': 15228, 'epoch': 3} {'type': 'loss', 'content': 0.08405788987874985, 'timestamp': '2025-10-01 04:31:47.175711', 'step': 15229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:47.205763', 'step': 15229, 'epoch': 3} {'type': 'loss', 'content': 0.10033026337623596, 'timestamp': '2025-10-01 04:31:47.207991', 'step': 15230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:47.238365', 'step': 15230, 'epoch': 3} {'type': 'loss', 'content': 0.1311531662940979, 'timestamp': '2025-10-01 04:31:47.243814', 'step': 15231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:47.274449', 'step': 15231, 'epoch': 3} {'type': 'loss', 'content': 0.06448432803153992, 'timestamp': '2025-10-01 04:31:47.297988', 'step': 15232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:47.329627', 'step': 15232, 'epoch': 3} {'type': 'loss', 'content': 0.041858796030282974, 'timestamp': '2025-10-01 04:31:47.331660', 'step': 15233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:47.361610', 'step': 15233, 'epoch': 3} {'type': 'loss', 'content': 0.06622802466154099, 'timestamp': '2025-10-01 04:31:47.363915', 'step': 15234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:47.407872', 'step': 15234, 'epoch': 3} {'type': 'loss', 'content': 0.045417699962854385, 'timestamp': '2025-10-01 04:31:47.410011', 'step': 15235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:47.440239', 'step': 15235, 'epoch': 3} {'type': 'loss', 'content': 0.06793931871652603, 'timestamp': '2025-10-01 04:31:47.463807', 'step': 15236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:47.495428', 'step': 15236, 'epoch': 3} {'type': 'loss', 'content': 0.07611002027988434, 'timestamp': '2025-10-01 04:31:47.497421', 'step': 15237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:47.527259', 'step': 15237, 'epoch': 3} {'type': 'loss', 'content': 0.08351998031139374, 'timestamp': '2025-10-01 04:31:47.529471', 'step': 15238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:47.559301', 'step': 15238, 'epoch': 3} {'type': 'loss', 'content': 0.09281948953866959, 'timestamp': '2025-10-01 04:31:47.561468', 'step': 15239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:47.592861', 'step': 15239, 'epoch': 3} {'type': 'loss', 'content': 0.09791615605354309, 'timestamp': '2025-10-01 04:31:47.616735', 'step': 15240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:47.648031', 'step': 15240, 'epoch': 3} {'type': 'loss', 'content': 0.17332422733306885, 'timestamp': '2025-10-01 04:31:47.650373', 'step': 15241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:47.681801', 'step': 15241, 'epoch': 3} {'type': 'loss', 'content': 0.1108156070113182, 'timestamp': '2025-10-01 04:31:47.683949', 'step': 15242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:47.713881', 'step': 15242, 'epoch': 3} {'type': 'loss', 'content': 0.05065501853823662, 'timestamp': '2025-10-01 04:31:47.716057', 'step': 15243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:47.745675', 'step': 15243, 'epoch': 3} {'type': 'loss', 'content': 0.06927406787872314, 'timestamp': '2025-10-01 04:31:47.784070', 'step': 15244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:47.815443', 'step': 15244, 'epoch': 3} {'type': 'loss', 'content': 0.07607047259807587, 'timestamp': '2025-10-01 04:31:47.819187', 'step': 15245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:47.859829', 'step': 15245, 'epoch': 3} {'type': 'loss', 'content': 0.18352898955345154, 'timestamp': '2025-10-01 04:31:47.861950', 'step': 15246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:47.903905', 'step': 15246, 'epoch': 3} {'type': 'loss', 'content': 0.08083243668079376, 'timestamp': '2025-10-01 04:31:47.906229', 'step': 15247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:47.935995', 'step': 15247, 'epoch': 3} {'type': 'loss', 'content': 0.13729646801948547, 'timestamp': '2025-10-01 04:31:47.959710', 'step': 15248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:47.989422', 'step': 15248, 'epoch': 3} {'type': 'loss', 'content': 0.10191380977630615, 'timestamp': '2025-10-01 04:31:47.992126', 'step': 15249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:48.021976', 'step': 15249, 'epoch': 3} {'type': 'loss', 'content': 0.04747052118182182, 'timestamp': '2025-10-01 04:31:48.024381', 'step': 15250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:48.054871', 'step': 15250, 'epoch': 3} {'type': 'loss', 'content': 0.1490194946527481, 'timestamp': '2025-10-01 04:31:48.059836', 'step': 15251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:48.096302', 'step': 15251, 'epoch': 3} {'type': 'loss', 'content': 0.08906672149896622, 'timestamp': '2025-10-01 04:31:48.121563', 'step': 15252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:48.163559', 'step': 15252, 'epoch': 3} {'type': 'loss', 'content': 0.09546743333339691, 'timestamp': '2025-10-01 04:31:48.165850', 'step': 15253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:48.204070', 'step': 15253, 'epoch': 3} {'type': 'loss', 'content': 0.08887644112110138, 'timestamp': '2025-10-01 04:31:48.206459', 'step': 15254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:48.237304', 'step': 15254, 'epoch': 3} {'type': 'loss', 'content': 0.04680429771542549, 'timestamp': '2025-10-01 04:31:48.239622', 'step': 15255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:48.270384', 'step': 15255, 'epoch': 3} {'type': 'loss', 'content': 0.09118344634771347, 'timestamp': '2025-10-01 04:31:48.293872', 'step': 15256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:48.323681', 'step': 15256, 'epoch': 3} {'type': 'loss', 'content': 0.10831736028194427, 'timestamp': '2025-10-01 04:31:48.325789', 'step': 15257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:48.356606', 'step': 15257, 'epoch': 3} {'type': 'loss', 'content': 0.0465967170894146, 'timestamp': '2025-10-01 04:31:48.373964', 'step': 15258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:48.404240', 'step': 15258, 'epoch': 3} {'type': 'loss', 'content': 0.07838881760835648, 'timestamp': '2025-10-01 04:31:48.406894', 'step': 15259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:48.437016', 'step': 15259, 'epoch': 3} {'type': 'loss', 'content': 0.06693577021360397, 'timestamp': '2025-10-01 04:31:48.461076', 'step': 15260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:48.493612', 'step': 15260, 'epoch': 3} {'type': 'loss', 'content': 0.10024836659431458, 'timestamp': '2025-10-01 04:31:48.495726', 'step': 15261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:48.528452', 'step': 15261, 'epoch': 3} {'type': 'loss', 'content': 0.08141089975833893, 'timestamp': '2025-10-01 04:31:48.530651', 'step': 15262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:48.560623', 'step': 15262, 'epoch': 3} {'type': 'loss', 'content': 0.07350785285234451, 'timestamp': '2025-10-01 04:31:48.563000', 'step': 15263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:48.594780', 'step': 15263, 'epoch': 3} {'type': 'loss', 'content': 0.06052052229642868, 'timestamp': '2025-10-01 04:31:48.618795', 'step': 15264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:48.652131', 'step': 15264, 'epoch': 3} {'type': 'loss', 'content': 0.15450207889080048, 'timestamp': '2025-10-01 04:31:48.654499', 'step': 15265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:48.688522', 'step': 15265, 'epoch': 3} {'type': 'loss', 'content': 0.08875124901533127, 'timestamp': '2025-10-01 04:31:48.691072', 'step': 15266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:48.724624', 'step': 15266, 'epoch': 3} {'type': 'loss', 'content': 0.13292159140110016, 'timestamp': '2025-10-01 04:31:48.726908', 'step': 15267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:48.761524', 'step': 15267, 'epoch': 3} {'type': 'loss', 'content': 0.05408928915858269, 'timestamp': '2025-10-01 04:31:48.785122', 'step': 15268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:48.817681', 'step': 15268, 'epoch': 3} {'type': 'loss', 'content': 0.07070616632699966, 'timestamp': '2025-10-01 04:31:48.823849', 'step': 15269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:48.854704', 'step': 15269, 'epoch': 3} {'type': 'loss', 'content': 0.042218953371047974, 'timestamp': '2025-10-01 04:31:48.857339', 'step': 15270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:48.888669', 'step': 15270, 'epoch': 3} {'type': 'loss', 'content': 0.08222059160470963, 'timestamp': '2025-10-01 04:31:48.891041', 'step': 15271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:48.923262', 'step': 15271, 'epoch': 3} {'type': 'loss', 'content': 0.06577173620462418, 'timestamp': '2025-10-01 04:31:48.947212', 'step': 15272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:48.980247', 'step': 15272, 'epoch': 3} {'type': 'loss', 'content': 0.05652550980448723, 'timestamp': '2025-10-01 04:31:48.982701', 'step': 15273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:49.013521', 'step': 15273, 'epoch': 3} {'type': 'loss', 'content': 0.05042953044176102, 'timestamp': '2025-10-01 04:31:49.016387', 'step': 15274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:49.048088', 'step': 15274, 'epoch': 3} {'type': 'loss', 'content': 0.0260018240660429, 'timestamp': '2025-10-01 04:31:49.050422', 'step': 15275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:49.081112', 'step': 15275, 'epoch': 3} {'type': 'loss', 'content': 0.19874736666679382, 'timestamp': '2025-10-01 04:31:49.104748', 'step': 15276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:49.140459', 'step': 15276, 'epoch': 3} {'type': 'loss', 'content': 0.12751148641109467, 'timestamp': '2025-10-01 04:31:49.142607', 'step': 15277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:49.174688', 'step': 15277, 'epoch': 3} {'type': 'loss', 'content': 0.04373740404844284, 'timestamp': '2025-10-01 04:31:49.185026', 'step': 15278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:49.219164', 'step': 15278, 'epoch': 3} {'type': 'loss', 'content': 0.04918281361460686, 'timestamp': '2025-10-01 04:31:49.223443', 'step': 15279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:49.272310', 'step': 15279, 'epoch': 3} {'type': 'loss', 'content': 0.14594054222106934, 'timestamp': '2025-10-01 04:31:49.300884', 'step': 15280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:49.337213', 'step': 15280, 'epoch': 3} {'type': 'loss', 'content': 0.029213329777121544, 'timestamp': '2025-10-01 04:31:49.343998', 'step': 15281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:49.375007', 'step': 15281, 'epoch': 3} {'type': 'loss', 'content': 0.06145075336098671, 'timestamp': '2025-10-01 04:31:49.377441', 'step': 15282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:49.415819', 'step': 15282, 'epoch': 3} {'type': 'loss', 'content': 0.03672000765800476, 'timestamp': '2025-10-01 04:31:49.419829', 'step': 15283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:49.461781', 'step': 15283, 'epoch': 3} {'type': 'loss', 'content': 0.05581427738070488, 'timestamp': '2025-10-01 04:31:49.486757', 'step': 15284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:49.525256', 'step': 15284, 'epoch': 3} {'type': 'loss', 'content': 0.1296975314617157, 'timestamp': '2025-10-01 04:31:49.527456', 'step': 15285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:49.558411', 'step': 15285, 'epoch': 3} {'type': 'loss', 'content': 0.05356496945023537, 'timestamp': '2025-10-01 04:31:49.561367', 'step': 15286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:49.594121', 'step': 15286, 'epoch': 3} {'type': 'loss', 'content': 0.2076713889837265, 'timestamp': '2025-10-01 04:31:49.597079', 'step': 15287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:49.629556', 'step': 15287, 'epoch': 3} {'type': 'loss', 'content': 0.08371444791555405, 'timestamp': '2025-10-01 04:31:49.654182', 'step': 15288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:49.685970', 'step': 15288, 'epoch': 3} {'type': 'loss', 'content': 0.11305958032608032, 'timestamp': '2025-10-01 04:31:49.689038', 'step': 15289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:49.724336', 'step': 15289, 'epoch': 3} {'type': 'loss', 'content': 0.03301504626870155, 'timestamp': '2025-10-01 04:31:49.726760', 'step': 15290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:49.757057', 'step': 15290, 'epoch': 3} {'type': 'loss', 'content': 0.06001072749495506, 'timestamp': '2025-10-01 04:31:49.760066', 'step': 15291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:49.797986', 'step': 15291, 'epoch': 3} {'type': 'loss', 'content': 0.07482679188251495, 'timestamp': '2025-10-01 04:31:49.825131', 'step': 15292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:31:49.856265', 'step': 15292, 'epoch': 3} {'type': 'loss', 'content': 0.08928371965885162, 'timestamp': '2025-10-01 04:31:49.860128', 'step': 15293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:49.893199', 'step': 15293, 'epoch': 3} {'type': 'loss', 'content': 0.07836742699146271, 'timestamp': '2025-10-01 04:31:49.896934', 'step': 15294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:49.928882', 'step': 15294, 'epoch': 3} {'type': 'loss', 'content': 0.10131308436393738, 'timestamp': '2025-10-01 04:31:49.938978', 'step': 15295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:49.980368', 'step': 15295, 'epoch': 3} {'type': 'loss', 'content': 0.06268953531980515, 'timestamp': '2025-10-01 04:31:50.007601', 'step': 15296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:50.039802', 'step': 15296, 'epoch': 3} {'type': 'loss', 'content': 0.0993906632065773, 'timestamp': '2025-10-01 04:31:50.042063', 'step': 15297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:50.084274', 'step': 15297, 'epoch': 3} {'type': 'loss', 'content': 0.04188062995672226, 'timestamp': '2025-10-01 04:31:50.093353', 'step': 15298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:50.128023', 'step': 15298, 'epoch': 3} {'type': 'loss', 'content': 0.1139514371752739, 'timestamp': '2025-10-01 04:31:50.133966', 'step': 15299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:50.210153', 'step': 15299, 'epoch': 3} {'type': 'loss', 'content': 0.13619719445705414, 'timestamp': '2025-10-01 04:31:50.235301', 'step': 15300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:50.270161', 'step': 15300, 'epoch': 3} {'type': 'loss', 'content': 0.14567148685455322, 'timestamp': '2025-10-01 04:31:50.272647', 'step': 15301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:50.309621', 'step': 15301, 'epoch': 3} {'type': 'loss', 'content': 0.17417854070663452, 'timestamp': '2025-10-01 04:31:50.312079', 'step': 15302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:50.346814', 'step': 15302, 'epoch': 3} {'type': 'loss', 'content': 0.07145374268293381, 'timestamp': '2025-10-01 04:31:50.349411', 'step': 15303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:50.399678', 'step': 15303, 'epoch': 3} {'type': 'loss', 'content': 0.0826272964477539, 'timestamp': '2025-10-01 04:31:50.423209', 'step': 15304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:50.458021', 'step': 15304, 'epoch': 3} {'type': 'loss', 'content': 0.07000480592250824, 'timestamp': '2025-10-01 04:31:50.460293', 'step': 15305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:50.500316', 'step': 15305, 'epoch': 3} {'type': 'loss', 'content': 0.027088642120361328, 'timestamp': '2025-10-01 04:31:50.502424', 'step': 15306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:50.535548', 'step': 15306, 'epoch': 3} {'type': 'loss', 'content': 0.03443538025021553, 'timestamp': '2025-10-01 04:31:50.538394', 'step': 15307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:50.572512', 'step': 15307, 'epoch': 3} {'type': 'loss', 'content': 0.11574224382638931, 'timestamp': '2025-10-01 04:31:50.596797', 'step': 15308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:50.632045', 'step': 15308, 'epoch': 3} {'type': 'loss', 'content': 0.10250924527645111, 'timestamp': '2025-10-01 04:31:50.634164', 'step': 15309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:50.669225', 'step': 15309, 'epoch': 3} {'type': 'loss', 'content': 0.07495483756065369, 'timestamp': '2025-10-01 04:31:50.671422', 'step': 15310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:50.719292', 'step': 15310, 'epoch': 3} {'type': 'loss', 'content': 0.10698265582323074, 'timestamp': '2025-10-01 04:31:50.721459', 'step': 15311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:50.763792', 'step': 15311, 'epoch': 3} {'type': 'loss', 'content': 0.08700702339410782, 'timestamp': '2025-10-01 04:31:50.787536', 'step': 15312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:50.821335', 'step': 15312, 'epoch': 3} {'type': 'loss', 'content': 0.08373034000396729, 'timestamp': '2025-10-01 04:31:50.824177', 'step': 15313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:50.862467', 'step': 15313, 'epoch': 3} {'type': 'loss', 'content': 0.05835562199354172, 'timestamp': '2025-10-01 04:31:50.864634', 'step': 15314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:50.906198', 'step': 15314, 'epoch': 3} {'type': 'loss', 'content': 0.17412132024765015, 'timestamp': '2025-10-01 04:31:50.908643', 'step': 15315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:50.959470', 'step': 15315, 'epoch': 3} {'type': 'loss', 'content': 0.09618967026472092, 'timestamp': '2025-10-01 04:31:50.983026', 'step': 15316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:51.015174', 'step': 15316, 'epoch': 3} {'type': 'loss', 'content': 0.09108378738164902, 'timestamp': '2025-10-01 04:31:51.017238', 'step': 15317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:51.051430', 'step': 15317, 'epoch': 3} {'type': 'loss', 'content': 0.1707499921321869, 'timestamp': '2025-10-01 04:31:51.053540', 'step': 15318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:51.090769', 'step': 15318, 'epoch': 3} {'type': 'loss', 'content': 0.05824677273631096, 'timestamp': '2025-10-01 04:31:51.095732', 'step': 15319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:51.139089', 'step': 15319, 'epoch': 3} {'type': 'loss', 'content': 0.11183778941631317, 'timestamp': '2025-10-01 04:31:51.162725', 'step': 15320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:51.195350', 'step': 15320, 'epoch': 3} {'type': 'loss', 'content': 0.0893477126955986, 'timestamp': '2025-10-01 04:31:51.201434', 'step': 15321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:51.233871', 'step': 15321, 'epoch': 3} {'type': 'loss', 'content': 0.07781703770160675, 'timestamp': '2025-10-01 04:31:51.236098', 'step': 15322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:51.268070', 'step': 15322, 'epoch': 3} {'type': 'loss', 'content': 0.09133420884609222, 'timestamp': '2025-10-01 04:31:51.270461', 'step': 15323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:51.305135', 'step': 15323, 'epoch': 3} {'type': 'loss', 'content': 0.019666191190481186, 'timestamp': '2025-10-01 04:31:51.328889', 'step': 15324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:51.359997', 'step': 15324, 'epoch': 3} {'type': 'loss', 'content': 0.15350262820720673, 'timestamp': '2025-10-01 04:31:51.362167', 'step': 15325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:51.399427', 'step': 15325, 'epoch': 3} {'type': 'loss', 'content': 0.06841683387756348, 'timestamp': '2025-10-01 04:31:51.401912', 'step': 15326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:51.447256', 'step': 15326, 'epoch': 3} {'type': 'loss', 'content': 0.053036049008369446, 'timestamp': '2025-10-01 04:31:51.449510', 'step': 15327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:51.491151', 'step': 15327, 'epoch': 3} {'type': 'loss', 'content': 0.07350736856460571, 'timestamp': '2025-10-01 04:31:51.515009', 'step': 15328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:51.546858', 'step': 15328, 'epoch': 3} {'type': 'loss', 'content': 0.0767163634300232, 'timestamp': '2025-10-01 04:31:51.549201', 'step': 15329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:51.592917', 'step': 15329, 'epoch': 3} {'type': 'loss', 'content': 0.08235494792461395, 'timestamp': '2025-10-01 04:31:51.595033', 'step': 15330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:51.638326', 'step': 15330, 'epoch': 3} {'type': 'loss', 'content': 0.11553440243005753, 'timestamp': '2025-10-01 04:31:51.640403', 'step': 15331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:51.672997', 'step': 15331, 'epoch': 3} {'type': 'loss', 'content': 0.12474672496318817, 'timestamp': '2025-10-01 04:31:51.696675', 'step': 15332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:51.730868', 'step': 15332, 'epoch': 3} {'type': 'loss', 'content': 0.06398036330938339, 'timestamp': '2025-10-01 04:31:51.732948', 'step': 15333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:51.771813', 'step': 15333, 'epoch': 3} {'type': 'loss', 'content': 0.048530325293540955, 'timestamp': '2025-10-01 04:31:51.774012', 'step': 15334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:51.805928', 'step': 15334, 'epoch': 3} {'type': 'loss', 'content': 0.045235030353069305, 'timestamp': '2025-10-01 04:31:51.808088', 'step': 15335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:51.850573', 'step': 15335, 'epoch': 3} {'type': 'loss', 'content': 0.02680979110300541, 'timestamp': '2025-10-01 04:31:51.874341', 'step': 15336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:51.911614', 'step': 15336, 'epoch': 3} {'type': 'loss', 'content': 0.04180669039487839, 'timestamp': '2025-10-01 04:31:51.913736', 'step': 15337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:51.944027', 'step': 15337, 'epoch': 3} {'type': 'loss', 'content': 0.05941874161362648, 'timestamp': '2025-10-01 04:31:51.946216', 'step': 15338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:51.982289', 'step': 15338, 'epoch': 3} {'type': 'loss', 'content': 0.049888014793395996, 'timestamp': '2025-10-01 04:31:51.984615', 'step': 15339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:52.022786', 'step': 15339, 'epoch': 3} {'type': 'loss', 'content': 0.0860358402132988, 'timestamp': '2025-10-01 04:31:52.046530', 'step': 15340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:52.078056', 'step': 15340, 'epoch': 3} {'type': 'loss', 'content': 0.08147799223661423, 'timestamp': '2025-10-01 04:31:52.080466', 'step': 15341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:52.113258', 'step': 15341, 'epoch': 3} {'type': 'loss', 'content': 0.1396113485097885, 'timestamp': '2025-10-01 04:31:52.115553', 'step': 15342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:52.155814', 'step': 15342, 'epoch': 3} {'type': 'loss', 'content': 0.06596454232931137, 'timestamp': '2025-10-01 04:31:52.158134', 'step': 15343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:52.189796', 'step': 15343, 'epoch': 3} {'type': 'loss', 'content': 0.06908644735813141, 'timestamp': '2025-10-01 04:31:52.214153', 'step': 15344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:52.249608', 'step': 15344, 'epoch': 3} {'type': 'loss', 'content': 0.03865985944867134, 'timestamp': '2025-10-01 04:31:52.252668', 'step': 15345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:52.285667', 'step': 15345, 'epoch': 3} {'type': 'loss', 'content': 0.09902458637952805, 'timestamp': '2025-10-01 04:31:52.287842', 'step': 15346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:52.319443', 'step': 15346, 'epoch': 3} {'type': 'loss', 'content': 0.10203230381011963, 'timestamp': '2025-10-01 04:31:52.321464', 'step': 15347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:52.361179', 'step': 15347, 'epoch': 3} {'type': 'loss', 'content': 0.09630394726991653, 'timestamp': '2025-10-01 04:31:52.384889', 'step': 15348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:52.424367', 'step': 15348, 'epoch': 3} {'type': 'loss', 'content': 0.05388566479086876, 'timestamp': '2025-10-01 04:31:52.426517', 'step': 15349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:52.458329', 'step': 15349, 'epoch': 3} {'type': 'loss', 'content': 0.012608399614691734, 'timestamp': '2025-10-01 04:31:52.460367', 'step': 15350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:52.494666', 'step': 15350, 'epoch': 3} {'type': 'loss', 'content': 0.06677280366420746, 'timestamp': '2025-10-01 04:31:52.497687', 'step': 15351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:52.540695', 'step': 15351, 'epoch': 3} {'type': 'loss', 'content': 0.07768916338682175, 'timestamp': '2025-10-01 04:31:52.564701', 'step': 15352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:52.595827', 'step': 15352, 'epoch': 3} {'type': 'loss', 'content': 0.0478457510471344, 'timestamp': '2025-10-01 04:31:52.599411', 'step': 15353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:52.636079', 'step': 15353, 'epoch': 3} {'type': 'loss', 'content': 0.04193180426955223, 'timestamp': '2025-10-01 04:31:52.644626', 'step': 15354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:52.674988', 'step': 15354, 'epoch': 3} {'type': 'loss', 'content': 0.10730015486478806, 'timestamp': '2025-10-01 04:31:52.677235', 'step': 15355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:52.717337', 'step': 15355, 'epoch': 3} {'type': 'loss', 'content': 0.046217143535614014, 'timestamp': '2025-10-01 04:31:52.741268', 'step': 15356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:52.779550', 'step': 15356, 'epoch': 3} {'type': 'loss', 'content': 0.13082405924797058, 'timestamp': '2025-10-01 04:31:52.782050', 'step': 15357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:52.814684', 'step': 15357, 'epoch': 3} {'type': 'loss', 'content': 0.11972139030694962, 'timestamp': '2025-10-01 04:31:52.816831', 'step': 15358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:52.859272', 'step': 15358, 'epoch': 3} {'type': 'loss', 'content': 0.040793243795633316, 'timestamp': '2025-10-01 04:31:52.862004', 'step': 15359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:52.898577', 'step': 15359, 'epoch': 3} {'type': 'loss', 'content': 0.10316276550292969, 'timestamp': '2025-10-01 04:31:52.922097', 'step': 15360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:52.962647', 'step': 15360, 'epoch': 3} {'type': 'loss', 'content': 0.07034047693014145, 'timestamp': '2025-10-01 04:31:52.964724', 'step': 15361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:52.997084', 'step': 15361, 'epoch': 3} {'type': 'loss', 'content': 0.038893964141607285, 'timestamp': '2025-10-01 04:31:52.999113', 'step': 15362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:53.039225', 'step': 15362, 'epoch': 3} {'type': 'loss', 'content': 0.08655087649822235, 'timestamp': '2025-10-01 04:31:53.041591', 'step': 15363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:53.075052', 'step': 15363, 'epoch': 3} {'type': 'loss', 'content': 0.12691165506839752, 'timestamp': '2025-10-01 04:31:53.098564', 'step': 15364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:53.144371', 'step': 15364, 'epoch': 3} {'type': 'loss', 'content': 0.10890419036149979, 'timestamp': '2025-10-01 04:31:53.146734', 'step': 15365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:31:53.197124', 'step': 15365, 'epoch': 3} {'type': 'loss', 'content': 0.13930527865886688, 'timestamp': '2025-10-01 04:31:53.201329', 'step': 15366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:31:53.245329', 'step': 15366, 'epoch': 3} {'type': 'loss', 'content': 0.11441818624734879, 'timestamp': '2025-10-01 04:31:53.250414', 'step': 15367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:53.283368', 'step': 15367, 'epoch': 3} {'type': 'loss', 'content': 0.08291807770729065, 'timestamp': '2025-10-01 04:31:53.307199', 'step': 15368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:53.339016', 'step': 15368, 'epoch': 3} {'type': 'loss', 'content': 0.11014579981565475, 'timestamp': '2025-10-01 04:31:53.341103', 'step': 15369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:53.374097', 'step': 15369, 'epoch': 3} {'type': 'loss', 'content': 0.09524162858724594, 'timestamp': '2025-10-01 04:31:53.376493', 'step': 15370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:31:53.408674', 'step': 15370, 'epoch': 3} {'type': 'loss', 'content': 0.07999464124441147, 'timestamp': '2025-10-01 04:31:53.410898', 'step': 15371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:53.443477', 'step': 15371, 'epoch': 3} {'type': 'loss', 'content': 0.08279312402009964, 'timestamp': '2025-10-01 04:31:53.467009', 'step': 15372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:53.503712', 'step': 15372, 'epoch': 3} {'type': 'loss', 'content': 0.09297129511833191, 'timestamp': '2025-10-01 04:31:53.508198', 'step': 15373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:53.539704', 'step': 15373, 'epoch': 3} {'type': 'loss', 'content': 0.13910074532032013, 'timestamp': '2025-10-01 04:31:53.542696', 'step': 15374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:31:53.592324', 'step': 15374, 'epoch': 3} {'type': 'loss', 'content': 0.11112517863512039, 'timestamp': '2025-10-01 04:31:53.606965', 'step': 15375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:53.640928', 'step': 15375, 'epoch': 3} {'type': 'loss', 'content': 0.05558167025446892, 'timestamp': '2025-10-01 04:31:53.664502', 'step': 15376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:31:53.695754', 'step': 15376, 'epoch': 3} {'type': 'loss', 'content': 0.10987870395183563, 'timestamp': '2025-10-01 04:31:53.697827', 'step': 15377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:31:53.737891', 'step': 15377, 'epoch': 3} {'type': 'loss', 'content': 0.053677428513765335, 'timestamp': '2025-10-01 04:31:53.740442', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:32:05.147415', 'step': 15378, 'epoch': 3} {'type': 'pplx', 'content': 10840.630363153332, 'timestamp': '2025-10-01 04:32:05.150166', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:05.182871', 'step': 15378, 'epoch': 3} {'type': 'loss', 'content': 0.05359742417931557, 'timestamp': '2025-10-01 04:32:05.185213', 'step': 15379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:05.218326', 'step': 15379, 'epoch': 3} {'type': 'loss', 'content': 0.1590149849653244, 'timestamp': '2025-10-01 04:32:05.242239', 'step': 15380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:05.282750', 'step': 15380, 'epoch': 3} {'type': 'loss', 'content': 0.08308926969766617, 'timestamp': '2025-10-01 04:32:05.284902', 'step': 15381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:05.322868', 'step': 15381, 'epoch': 3} {'type': 'loss', 'content': 0.1400710642337799, 'timestamp': '2025-10-01 04:32:05.326019', 'step': 15382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:05.359964', 'step': 15382, 'epoch': 3} {'type': 'loss', 'content': 0.020743276923894882, 'timestamp': '2025-10-01 04:32:05.362575', 'step': 15383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:05.396757', 'step': 15383, 'epoch': 3} {'type': 'loss', 'content': 0.1187419593334198, 'timestamp': '2025-10-01 04:32:05.420587', 'step': 15384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:05.460135', 'step': 15384, 'epoch': 3} {'type': 'loss', 'content': 0.061704304069280624, 'timestamp': '2025-10-01 04:32:05.462538', 'step': 15385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:05.494434', 'step': 15385, 'epoch': 3} {'type': 'loss', 'content': 0.06902845948934555, 'timestamp': '2025-10-01 04:32:05.498439', 'step': 15386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:05.529656', 'step': 15386, 'epoch': 3} {'type': 'loss', 'content': 0.04387993738055229, 'timestamp': '2025-10-01 04:32:05.533083', 'step': 15387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:05.564337', 'step': 15387, 'epoch': 3} {'type': 'loss', 'content': 0.10201753675937653, 'timestamp': '2025-10-01 04:32:05.590804', 'step': 15388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:05.622762', 'step': 15388, 'epoch': 3} {'type': 'loss', 'content': 0.09235671907663345, 'timestamp': '2025-10-01 04:32:05.625967', 'step': 15389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:05.658888', 'step': 15389, 'epoch': 3} {'type': 'loss', 'content': 0.044052161276340485, 'timestamp': '2025-10-01 04:32:05.661697', 'step': 15390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:05.692839', 'step': 15390, 'epoch': 3} {'type': 'loss', 'content': 0.18907839059829712, 'timestamp': '2025-10-01 04:32:05.695715', 'step': 15391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:05.726300', 'step': 15391, 'epoch': 3} {'type': 'loss', 'content': 0.07504040747880936, 'timestamp': '2025-10-01 04:32:05.750546', 'step': 15392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:05.782972', 'step': 15392, 'epoch': 3} {'type': 'loss', 'content': 0.043647971004247665, 'timestamp': '2025-10-01 04:32:05.786193', 'step': 15393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:05.818062', 'step': 15393, 'epoch': 3} {'type': 'loss', 'content': 0.022191712632775307, 'timestamp': '2025-10-01 04:32:05.821807', 'step': 15394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:05.855138', 'step': 15394, 'epoch': 3} {'type': 'loss', 'content': 0.09527760744094849, 'timestamp': '2025-10-01 04:32:05.857551', 'step': 15395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:05.889712', 'step': 15395, 'epoch': 3} {'type': 'loss', 'content': 0.03901026025414467, 'timestamp': '2025-10-01 04:32:05.913634', 'step': 15396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:05.944879', 'step': 15396, 'epoch': 3} {'type': 'loss', 'content': 0.1077541932463646, 'timestamp': '2025-10-01 04:32:05.947280', 'step': 15397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:05.979267', 'step': 15397, 'epoch': 3} {'type': 'loss', 'content': 0.07415184378623962, 'timestamp': '2025-10-01 04:32:05.981570', 'step': 15398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:06.012852', 'step': 15398, 'epoch': 3} {'type': 'loss', 'content': 0.08925452828407288, 'timestamp': '2025-10-01 04:32:06.015546', 'step': 15399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:06.046944', 'step': 15399, 'epoch': 3} {'type': 'loss', 'content': 0.18303996324539185, 'timestamp': '2025-10-01 04:32:06.071150', 'step': 15400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:32:06.103160', 'step': 15400, 'epoch': 3} {'type': 'loss', 'content': 0.11432096362113953, 'timestamp': '2025-10-01 04:32:06.114041', 'step': 15401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:06.145099', 'step': 15401, 'epoch': 3} {'type': 'loss', 'content': 0.09774110466241837, 'timestamp': '2025-10-01 04:32:06.147331', 'step': 15402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:06.181721', 'step': 15402, 'epoch': 3} {'type': 'loss', 'content': 0.08085815608501434, 'timestamp': '2025-10-01 04:32:06.184765', 'step': 15403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:06.215841', 'step': 15403, 'epoch': 3} {'type': 'loss', 'content': 0.08495045453310013, 'timestamp': '2025-10-01 04:32:06.239864', 'step': 15404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:06.271418', 'step': 15404, 'epoch': 3} {'type': 'loss', 'content': 0.05893619731068611, 'timestamp': '2025-10-01 04:32:06.273841', 'step': 15405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:06.304305', 'step': 15405, 'epoch': 3} {'type': 'loss', 'content': 0.037270281463861465, 'timestamp': '2025-10-01 04:32:06.306742', 'step': 15406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:06.338018', 'step': 15406, 'epoch': 3} {'type': 'loss', 'content': 0.03075292520225048, 'timestamp': '2025-10-01 04:32:06.340473', 'step': 15407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:06.374513', 'step': 15407, 'epoch': 3} {'type': 'loss', 'content': 0.12301596999168396, 'timestamp': '2025-10-01 04:32:06.398353', 'step': 15408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:06.429759', 'step': 15408, 'epoch': 3} {'type': 'loss', 'content': 0.08754361420869827, 'timestamp': '2025-10-01 04:32:06.432647', 'step': 15409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:06.465011', 'step': 15409, 'epoch': 3} {'type': 'loss', 'content': 0.09621789306402206, 'timestamp': '2025-10-01 04:32:06.467328', 'step': 15410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:06.501081', 'step': 15410, 'epoch': 3} {'type': 'loss', 'content': 0.11505567282438278, 'timestamp': '2025-10-01 04:32:06.503489', 'step': 15411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:06.537539', 'step': 15411, 'epoch': 3} {'type': 'loss', 'content': 0.0570141039788723, 'timestamp': '2025-10-01 04:32:06.561646', 'step': 15412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:06.592733', 'step': 15412, 'epoch': 3} {'type': 'loss', 'content': 0.0435781329870224, 'timestamp': '2025-10-01 04:32:06.595131', 'step': 15413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:06.635602', 'step': 15413, 'epoch': 3} {'type': 'loss', 'content': 0.07493754476308823, 'timestamp': '2025-10-01 04:32:06.639778', 'step': 15414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:06.671676', 'step': 15414, 'epoch': 3} {'type': 'loss', 'content': 0.06974303722381592, 'timestamp': '2025-10-01 04:32:06.674177', 'step': 15415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:06.705597', 'step': 15415, 'epoch': 3} {'type': 'loss', 'content': 0.14056724309921265, 'timestamp': '2025-10-01 04:32:06.729503', 'step': 15416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:06.760871', 'step': 15416, 'epoch': 3} {'type': 'loss', 'content': 0.05647233873605728, 'timestamp': '2025-10-01 04:32:06.763411', 'step': 15417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:06.795585', 'step': 15417, 'epoch': 3} {'type': 'loss', 'content': 0.040545836091041565, 'timestamp': '2025-10-01 04:32:06.798577', 'step': 15418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:06.830393', 'step': 15418, 'epoch': 3} {'type': 'loss', 'content': 0.03818965330719948, 'timestamp': '2025-10-01 04:32:06.832632', 'step': 15419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:06.863968', 'step': 15419, 'epoch': 3} {'type': 'loss', 'content': 0.022821230813860893, 'timestamp': '2025-10-01 04:32:06.889073', 'step': 15420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:06.921011', 'step': 15420, 'epoch': 3} {'type': 'loss', 'content': 0.04081060364842415, 'timestamp': '2025-10-01 04:32:06.923141', 'step': 15421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:06.953478', 'step': 15421, 'epoch': 3} {'type': 'loss', 'content': 0.11770196259021759, 'timestamp': '2025-10-01 04:32:06.955766', 'step': 15422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:06.987746', 'step': 15422, 'epoch': 3} {'type': 'loss', 'content': 0.0511891171336174, 'timestamp': '2025-10-01 04:32:06.990320', 'step': 15423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:07.022093', 'step': 15423, 'epoch': 3} {'type': 'loss', 'content': 0.060281820595264435, 'timestamp': '2025-10-01 04:32:07.045992', 'step': 15424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:07.089711', 'step': 15424, 'epoch': 3} {'type': 'loss', 'content': 0.04481508955359459, 'timestamp': '2025-10-01 04:32:07.092655', 'step': 15425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:07.129799', 'step': 15425, 'epoch': 3} {'type': 'loss', 'content': 0.05736968666315079, 'timestamp': '2025-10-01 04:32:07.139158', 'step': 15426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:07.171688', 'step': 15426, 'epoch': 3} {'type': 'loss', 'content': 0.07062279433012009, 'timestamp': '2025-10-01 04:32:07.177252', 'step': 15427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:07.208124', 'step': 15427, 'epoch': 3} {'type': 'loss', 'content': 0.08923175930976868, 'timestamp': '2025-10-01 04:32:07.231763', 'step': 15428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:07.272450', 'step': 15428, 'epoch': 3} {'type': 'loss', 'content': 0.036941468715667725, 'timestamp': '2025-10-01 04:32:07.277536', 'step': 15429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:07.309021', 'step': 15429, 'epoch': 3} {'type': 'loss', 'content': 0.02281131036579609, 'timestamp': '2025-10-01 04:32:07.311122', 'step': 15430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:07.342600', 'step': 15430, 'epoch': 3} {'type': 'loss', 'content': 0.03447514399886131, 'timestamp': '2025-10-01 04:32:07.344640', 'step': 15431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:07.376793', 'step': 15431, 'epoch': 3} {'type': 'loss', 'content': 0.038532935082912445, 'timestamp': '2025-10-01 04:32:07.400592', 'step': 15432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:07.441395', 'step': 15432, 'epoch': 3} {'type': 'loss', 'content': 0.10995492339134216, 'timestamp': '2025-10-01 04:32:07.443520', 'step': 15433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:07.480015', 'step': 15433, 'epoch': 3} {'type': 'loss', 'content': 0.07948852330446243, 'timestamp': '2025-10-01 04:32:07.483702', 'step': 15434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:07.517483', 'step': 15434, 'epoch': 3} {'type': 'loss', 'content': 0.05933719128370285, 'timestamp': '2025-10-01 04:32:07.520043', 'step': 15435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:07.550433', 'step': 15435, 'epoch': 3} {'type': 'loss', 'content': 0.053665947169065475, 'timestamp': '2025-10-01 04:32:07.574382', 'step': 15436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:07.605465', 'step': 15436, 'epoch': 3} {'type': 'loss', 'content': 0.09351689368486404, 'timestamp': '2025-10-01 04:32:07.607671', 'step': 15437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:07.640871', 'step': 15437, 'epoch': 3} {'type': 'loss', 'content': 0.05549251660704613, 'timestamp': '2025-10-01 04:32:07.643507', 'step': 15438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:07.675069', 'step': 15438, 'epoch': 3} {'type': 'loss', 'content': 0.12483178824186325, 'timestamp': '2025-10-01 04:32:07.677362', 'step': 15439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:07.708797', 'step': 15439, 'epoch': 3} {'type': 'loss', 'content': 0.058397118002176285, 'timestamp': '2025-10-01 04:32:07.732593', 'step': 15440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:07.764775', 'step': 15440, 'epoch': 3} {'type': 'loss', 'content': 0.0448039248585701, 'timestamp': '2025-10-01 04:32:07.767379', 'step': 15441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:07.800080', 'step': 15441, 'epoch': 3} {'type': 'loss', 'content': 0.1483285278081894, 'timestamp': '2025-10-01 04:32:07.802052', 'step': 15442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:07.833274', 'step': 15442, 'epoch': 3} {'type': 'loss', 'content': 0.09397454559803009, 'timestamp': '2025-10-01 04:32:07.836128', 'step': 15443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:07.866911', 'step': 15443, 'epoch': 3} {'type': 'loss', 'content': 0.013545427471399307, 'timestamp': '2025-10-01 04:32:07.894321', 'step': 15444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:07.924918', 'step': 15444, 'epoch': 3} {'type': 'loss', 'content': 0.058196645230054855, 'timestamp': '2025-10-01 04:32:07.934092', 'step': 15445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:07.966794', 'step': 15445, 'epoch': 3} {'type': 'loss', 'content': 0.10096365213394165, 'timestamp': '2025-10-01 04:32:07.969765', 'step': 15446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:08.002114', 'step': 15446, 'epoch': 3} {'type': 'loss', 'content': 0.09312141686677933, 'timestamp': '2025-10-01 04:32:08.005096', 'step': 15447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:32:08.036768', 'step': 15447, 'epoch': 3} {'type': 'loss', 'content': 0.09111379832029343, 'timestamp': '2025-10-01 04:32:08.061911', 'step': 15448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:08.093382', 'step': 15448, 'epoch': 3} {'type': 'loss', 'content': 0.09647727757692337, 'timestamp': '2025-10-01 04:32:08.095742', 'step': 15449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.128308', 'step': 15449, 'epoch': 3} {'type': 'loss', 'content': 0.10293703526258469, 'timestamp': '2025-10-01 04:32:08.131482', 'step': 15450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:08.164544', 'step': 15450, 'epoch': 3} {'type': 'loss', 'content': 0.08019143342971802, 'timestamp': '2025-10-01 04:32:08.169563', 'step': 15451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:08.208326', 'step': 15451, 'epoch': 3} {'type': 'loss', 'content': 0.0437203124165535, 'timestamp': '2025-10-01 04:32:08.234230', 'step': 15452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:32:08.266406', 'step': 15452, 'epoch': 3} {'type': 'loss', 'content': 0.1310293972492218, 'timestamp': '2025-10-01 04:32:08.268549', 'step': 15453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:32:08.299572', 'step': 15453, 'epoch': 3} {'type': 'loss', 'content': 0.03748885169625282, 'timestamp': '2025-10-01 04:32:08.303867', 'step': 15454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.335404', 'step': 15454, 'epoch': 3} {'type': 'loss', 'content': 0.09891275316476822, 'timestamp': '2025-10-01 04:32:08.337502', 'step': 15455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:08.368824', 'step': 15455, 'epoch': 3} {'type': 'loss', 'content': 0.14135588705539703, 'timestamp': '2025-10-01 04:32:08.392766', 'step': 15456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.430216', 'step': 15456, 'epoch': 3} {'type': 'loss', 'content': 0.01945563405752182, 'timestamp': '2025-10-01 04:32:08.432154', 'step': 15457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.462083', 'step': 15457, 'epoch': 3} {'type': 'loss', 'content': 0.04689515382051468, 'timestamp': '2025-10-01 04:32:08.464274', 'step': 15458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.496351', 'step': 15458, 'epoch': 3} {'type': 'loss', 'content': 0.09621454775333405, 'timestamp': '2025-10-01 04:32:08.498547', 'step': 15459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:08.529281', 'step': 15459, 'epoch': 3} {'type': 'loss', 'content': 0.040604736655950546, 'timestamp': '2025-10-01 04:32:08.553568', 'step': 15460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.584264', 'step': 15460, 'epoch': 3} {'type': 'loss', 'content': 0.07744838297367096, 'timestamp': '2025-10-01 04:32:08.586755', 'step': 15461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.618002', 'step': 15461, 'epoch': 3} {'type': 'loss', 'content': 0.06457734107971191, 'timestamp': '2025-10-01 04:32:08.620640', 'step': 15462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.652221', 'step': 15462, 'epoch': 3} {'type': 'loss', 'content': 0.037854477763175964, 'timestamp': '2025-10-01 04:32:08.654546', 'step': 15463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:08.686580', 'step': 15463, 'epoch': 3} {'type': 'loss', 'content': 0.024462593719363213, 'timestamp': '2025-10-01 04:32:08.710377', 'step': 15464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:08.740985', 'step': 15464, 'epoch': 3} {'type': 'loss', 'content': 0.06932689994573593, 'timestamp': '2025-10-01 04:32:08.743598', 'step': 15465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.774416', 'step': 15465, 'epoch': 3} {'type': 'loss', 'content': 0.0532412976026535, 'timestamp': '2025-10-01 04:32:08.776650', 'step': 15466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:08.807220', 'step': 15466, 'epoch': 3} {'type': 'loss', 'content': 0.047156188637018204, 'timestamp': '2025-10-01 04:32:08.809344', 'step': 15467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:08.841973', 'step': 15467, 'epoch': 3} {'type': 'loss', 'content': 0.09663821756839752, 'timestamp': '2025-10-01 04:32:08.865607', 'step': 15468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:08.905290', 'step': 15468, 'epoch': 3} {'type': 'loss', 'content': 0.08102119714021683, 'timestamp': '2025-10-01 04:32:08.907433', 'step': 15469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.937750', 'step': 15469, 'epoch': 3} {'type': 'loss', 'content': 0.05651281401515007, 'timestamp': '2025-10-01 04:32:08.940066', 'step': 15470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:08.970424', 'step': 15470, 'epoch': 3} {'type': 'loss', 'content': 0.04910106956958771, 'timestamp': '2025-10-01 04:32:08.972624', 'step': 15471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.003344', 'step': 15471, 'epoch': 3} {'type': 'loss', 'content': 0.070638507604599, 'timestamp': '2025-10-01 04:32:09.027045', 'step': 15472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:09.057925', 'step': 15472, 'epoch': 3} {'type': 'loss', 'content': 0.058329422026872635, 'timestamp': '2025-10-01 04:32:09.060066', 'step': 15473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:09.091667', 'step': 15473, 'epoch': 3} {'type': 'loss', 'content': 0.037356939166784286, 'timestamp': '2025-10-01 04:32:09.094135', 'step': 15474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.124659', 'step': 15474, 'epoch': 3} {'type': 'loss', 'content': 0.08764457702636719, 'timestamp': '2025-10-01 04:32:09.128022', 'step': 15475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.158868', 'step': 15475, 'epoch': 3} {'type': 'loss', 'content': 0.06296748667955399, 'timestamp': '2025-10-01 04:32:09.182560', 'step': 15476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.215923', 'step': 15476, 'epoch': 3} {'type': 'loss', 'content': 0.03313114494085312, 'timestamp': '2025-10-01 04:32:09.218080', 'step': 15477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:09.247951', 'step': 15477, 'epoch': 3} {'type': 'loss', 'content': 0.035181425511837006, 'timestamp': '2025-10-01 04:32:09.250404', 'step': 15478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.285194', 'step': 15478, 'epoch': 3} {'type': 'loss', 'content': 0.07208207249641418, 'timestamp': '2025-10-01 04:32:09.287335', 'step': 15479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:09.318513', 'step': 15479, 'epoch': 3} {'type': 'loss', 'content': 0.05326039344072342, 'timestamp': '2025-10-01 04:32:09.342427', 'step': 15480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.379117', 'step': 15480, 'epoch': 3} {'type': 'loss', 'content': 0.09126047790050507, 'timestamp': '2025-10-01 04:32:09.381383', 'step': 15481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:09.412713', 'step': 15481, 'epoch': 3} {'type': 'loss', 'content': 0.07584349811077118, 'timestamp': '2025-10-01 04:32:09.414936', 'step': 15482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:09.446586', 'step': 15482, 'epoch': 3} {'type': 'loss', 'content': 0.07833129912614822, 'timestamp': '2025-10-01 04:32:09.448923', 'step': 15483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:09.479205', 'step': 15483, 'epoch': 3} {'type': 'loss', 'content': 0.14849570393562317, 'timestamp': '2025-10-01 04:32:09.502839', 'step': 15484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:32:09.533984', 'step': 15484, 'epoch': 3} {'type': 'loss', 'content': 0.09280581772327423, 'timestamp': '2025-10-01 04:32:09.536844', 'step': 15485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.569690', 'step': 15485, 'epoch': 3} {'type': 'loss', 'content': 0.06253416836261749, 'timestamp': '2025-10-01 04:32:09.571954', 'step': 15486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:09.611160', 'step': 15486, 'epoch': 3} {'type': 'loss', 'content': 0.08305878937244415, 'timestamp': '2025-10-01 04:32:09.613466', 'step': 15487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:09.644406', 'step': 15487, 'epoch': 3} {'type': 'loss', 'content': 0.07169024646282196, 'timestamp': '2025-10-01 04:32:09.668879', 'step': 15488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.698919', 'step': 15488, 'epoch': 3} {'type': 'loss', 'content': 0.05713315308094025, 'timestamp': '2025-10-01 04:32:09.701632', 'step': 15489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.736269', 'step': 15489, 'epoch': 3} {'type': 'loss', 'content': 0.11153320968151093, 'timestamp': '2025-10-01 04:32:09.742526', 'step': 15490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:09.772353', 'step': 15490, 'epoch': 3} {'type': 'loss', 'content': 0.06095742806792259, 'timestamp': '2025-10-01 04:32:09.774510', 'step': 15491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.805283', 'step': 15491, 'epoch': 3} {'type': 'loss', 'content': 0.02389848604798317, 'timestamp': '2025-10-01 04:32:09.828866', 'step': 15492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.878276', 'step': 15492, 'epoch': 3} {'type': 'loss', 'content': 0.05554397404193878, 'timestamp': '2025-10-01 04:32:09.880757', 'step': 15493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:09.932021', 'step': 15493, 'epoch': 3} {'type': 'loss', 'content': 0.1825019121170044, 'timestamp': '2025-10-01 04:32:09.934492', 'step': 15494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:09.968350', 'step': 15494, 'epoch': 3} {'type': 'loss', 'content': 0.02449856512248516, 'timestamp': '2025-10-01 04:32:09.970664', 'step': 15495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:10.013432', 'step': 15495, 'epoch': 3} {'type': 'loss', 'content': 0.05123145878314972, 'timestamp': '2025-10-01 04:32:10.037209', 'step': 15496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:10.074619', 'step': 15496, 'epoch': 3} {'type': 'loss', 'content': 0.02323060669004917, 'timestamp': '2025-10-01 04:32:10.076805', 'step': 15497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:10.119736', 'step': 15497, 'epoch': 3} {'type': 'loss', 'content': 0.10807458311319351, 'timestamp': '2025-10-01 04:32:10.136775', 'step': 15498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:10.181276', 'step': 15498, 'epoch': 3} {'type': 'loss', 'content': 0.127865269780159, 'timestamp': '2025-10-01 04:32:10.183595', 'step': 15499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:10.216178', 'step': 15499, 'epoch': 3} {'type': 'loss', 'content': 0.13567595183849335, 'timestamp': '2025-10-01 04:32:10.239981', 'step': 15500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15500', 'timestamp': '2025-10-01 04:32:15.678411', 'step': 15500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:15.723455', 'step': 15500, 'epoch': 3} {'type': 'loss', 'content': 0.045890990644693375, 'timestamp': '2025-10-01 04:32:15.726092', 'step': 15501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:15.756946', 'step': 15501, 'epoch': 3} {'type': 'loss', 'content': 0.04975784942507744, 'timestamp': '2025-10-01 04:32:15.759121', 'step': 15502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:15.791276', 'step': 15502, 'epoch': 3} {'type': 'loss', 'content': 0.06573144346475601, 'timestamp': '2025-10-01 04:32:15.793563', 'step': 15503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:15.828024', 'step': 15503, 'epoch': 3} {'type': 'loss', 'content': 0.1297016739845276, 'timestamp': '2025-10-01 04:32:15.851916', 'step': 15504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:15.883051', 'step': 15504, 'epoch': 3} {'type': 'loss', 'content': 0.07300158590078354, 'timestamp': '2025-10-01 04:32:15.885226', 'step': 15505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:32:15.917175', 'step': 15505, 'epoch': 3} {'type': 'loss', 'content': 0.05539517477154732, 'timestamp': '2025-10-01 04:32:15.924396', 'step': 15506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:15.957045', 'step': 15506, 'epoch': 3} {'type': 'loss', 'content': 0.06436242163181305, 'timestamp': '2025-10-01 04:32:15.959149', 'step': 15507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:15.990587', 'step': 15507, 'epoch': 3} {'type': 'loss', 'content': 0.15815512835979462, 'timestamp': '2025-10-01 04:32:16.015010', 'step': 15508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:16.046004', 'step': 15508, 'epoch': 3} {'type': 'loss', 'content': 0.14792397618293762, 'timestamp': '2025-10-01 04:32:16.048478', 'step': 15509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:16.081603', 'step': 15509, 'epoch': 3} {'type': 'loss', 'content': 0.05969899147748947, 'timestamp': '2025-10-01 04:32:16.083845', 'step': 15510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:16.113992', 'step': 15510, 'epoch': 3} {'type': 'loss', 'content': 0.07815466821193695, 'timestamp': '2025-10-01 04:32:16.116871', 'step': 15511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:32:16.147996', 'step': 15511, 'epoch': 3} {'type': 'loss', 'content': 0.11302052438259125, 'timestamp': '2025-10-01 04:32:16.176490', 'step': 15512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:16.212083', 'step': 15512, 'epoch': 3} {'type': 'loss', 'content': 0.07474060356616974, 'timestamp': '2025-10-01 04:32:16.214199', 'step': 15513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:16.244244', 'step': 15513, 'epoch': 3} {'type': 'loss', 'content': 0.0650845393538475, 'timestamp': '2025-10-01 04:32:16.246357', 'step': 15514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:16.276671', 'step': 15514, 'epoch': 3} {'type': 'loss', 'content': 0.0779402107000351, 'timestamp': '2025-10-01 04:32:16.278794', 'step': 15515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:16.309008', 'step': 15515, 'epoch': 3} {'type': 'loss', 'content': 0.11553152650594711, 'timestamp': '2025-10-01 04:32:16.332500', 'step': 15516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:16.362866', 'step': 15516, 'epoch': 3} {'type': 'loss', 'content': 0.048431627452373505, 'timestamp': '2025-10-01 04:32:16.365007', 'step': 15517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:16.397057', 'step': 15517, 'epoch': 3} {'type': 'loss', 'content': 0.08457621932029724, 'timestamp': '2025-10-01 04:32:16.399293', 'step': 15518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:16.429962', 'step': 15518, 'epoch': 3} {'type': 'loss', 'content': 0.05323413386940956, 'timestamp': '2025-10-01 04:32:16.432135', 'step': 15519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:16.462819', 'step': 15519, 'epoch': 3} {'type': 'loss', 'content': 0.053299445658922195, 'timestamp': '2025-10-01 04:32:16.488228', 'step': 15520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:16.519401', 'step': 15520, 'epoch': 3} {'type': 'loss', 'content': 0.03895095735788345, 'timestamp': '2025-10-01 04:32:16.521563', 'step': 15521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:16.552350', 'step': 15521, 'epoch': 3} {'type': 'loss', 'content': 0.15278016030788422, 'timestamp': '2025-10-01 04:32:16.555409', 'step': 15522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:16.585731', 'step': 15522, 'epoch': 3} {'type': 'loss', 'content': 0.07980853319168091, 'timestamp': '2025-10-01 04:32:16.587999', 'step': 15523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:16.619441', 'step': 15523, 'epoch': 3} {'type': 'loss', 'content': 0.08456635475158691, 'timestamp': '2025-10-01 04:32:16.643177', 'step': 15524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:16.674228', 'step': 15524, 'epoch': 3} {'type': 'loss', 'content': 0.0903729498386383, 'timestamp': '2025-10-01 04:32:16.676307', 'step': 15525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:16.707209', 'step': 15525, 'epoch': 3} {'type': 'loss', 'content': 0.05855692923069, 'timestamp': '2025-10-01 04:32:16.710544', 'step': 15526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:16.742530', 'step': 15526, 'epoch': 3} {'type': 'loss', 'content': 0.027219071984291077, 'timestamp': '2025-10-01 04:32:16.744775', 'step': 15527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:16.776271', 'step': 15527, 'epoch': 3} {'type': 'loss', 'content': 0.07288084924221039, 'timestamp': '2025-10-01 04:32:16.801681', 'step': 15528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:16.832062', 'step': 15528, 'epoch': 3} {'type': 'loss', 'content': 0.10152671486139297, 'timestamp': '2025-10-01 04:32:16.834220', 'step': 15529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:16.864818', 'step': 15529, 'epoch': 3} {'type': 'loss', 'content': 0.051880866289138794, 'timestamp': '2025-10-01 04:32:16.866935', 'step': 15530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:16.899796', 'step': 15530, 'epoch': 3} {'type': 'loss', 'content': 0.0738489106297493, 'timestamp': '2025-10-01 04:32:16.901900', 'step': 15531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:16.932447', 'step': 15531, 'epoch': 3} {'type': 'loss', 'content': 0.07910972833633423, 'timestamp': '2025-10-01 04:32:16.956180', 'step': 15532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:16.986548', 'step': 15532, 'epoch': 3} {'type': 'loss', 'content': 0.05964493378996849, 'timestamp': '2025-10-01 04:32:16.988742', 'step': 15533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:17.018454', 'step': 15533, 'epoch': 3} {'type': 'loss', 'content': 0.0626293495297432, 'timestamp': '2025-10-01 04:32:17.020654', 'step': 15534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:17.050965', 'step': 15534, 'epoch': 3} {'type': 'loss', 'content': 0.13024108111858368, 'timestamp': '2025-10-01 04:32:17.053037', 'step': 15535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:17.084161', 'step': 15535, 'epoch': 3} {'type': 'loss', 'content': 0.11713334918022156, 'timestamp': '2025-10-01 04:32:17.114424', 'step': 15536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:17.144506', 'step': 15536, 'epoch': 3} {'type': 'loss', 'content': 0.09676949679851532, 'timestamp': '2025-10-01 04:32:17.147108', 'step': 15537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:17.180155', 'step': 15537, 'epoch': 3} {'type': 'loss', 'content': 0.07180288434028625, 'timestamp': '2025-10-01 04:32:17.183080', 'step': 15538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:17.214158', 'step': 15538, 'epoch': 3} {'type': 'loss', 'content': 0.13742221891880035, 'timestamp': '2025-10-01 04:32:17.216240', 'step': 15539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:17.247241', 'step': 15539, 'epoch': 3} {'type': 'loss', 'content': 0.11536096036434174, 'timestamp': '2025-10-01 04:32:17.271183', 'step': 15540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:17.308275', 'step': 15540, 'epoch': 3} {'type': 'loss', 'content': 0.08055958151817322, 'timestamp': '2025-10-01 04:32:17.310362', 'step': 15541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:17.341287', 'step': 15541, 'epoch': 3} {'type': 'loss', 'content': 0.09009493887424469, 'timestamp': '2025-10-01 04:32:17.343744', 'step': 15542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:17.381289', 'step': 15542, 'epoch': 3} {'type': 'loss', 'content': 0.10846421867609024, 'timestamp': '2025-10-01 04:32:17.383361', 'step': 15543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:17.422452', 'step': 15543, 'epoch': 3} {'type': 'loss', 'content': 0.06419730931520462, 'timestamp': '2025-10-01 04:32:17.446396', 'step': 15544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:17.477763', 'step': 15544, 'epoch': 3} {'type': 'loss', 'content': 0.04116557165980339, 'timestamp': '2025-10-01 04:32:17.479907', 'step': 15545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:17.511248', 'step': 15545, 'epoch': 3} {'type': 'loss', 'content': 0.1134241446852684, 'timestamp': '2025-10-01 04:32:17.513389', 'step': 15546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:17.544583', 'step': 15546, 'epoch': 3} {'type': 'loss', 'content': 0.17729398608207703, 'timestamp': '2025-10-01 04:32:17.546758', 'step': 15547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:17.577033', 'step': 15547, 'epoch': 3} {'type': 'loss', 'content': 0.07149334251880646, 'timestamp': '2025-10-01 04:32:17.600685', 'step': 15548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:17.631196', 'step': 15548, 'epoch': 3} {'type': 'loss', 'content': 0.1322505921125412, 'timestamp': '2025-10-01 04:32:17.633286', 'step': 15549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:17.663698', 'step': 15549, 'epoch': 3} {'type': 'loss', 'content': 0.04043462127447128, 'timestamp': '2025-10-01 04:32:17.667376', 'step': 15550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:17.705778', 'step': 15550, 'epoch': 3} {'type': 'loss', 'content': 0.06423834711313248, 'timestamp': '2025-10-01 04:32:17.708147', 'step': 15551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:17.743184', 'step': 15551, 'epoch': 3} {'type': 'loss', 'content': 0.05902308598160744, 'timestamp': '2025-10-01 04:32:17.766943', 'step': 15552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:17.797517', 'step': 15552, 'epoch': 3} {'type': 'loss', 'content': 0.11105066537857056, 'timestamp': '2025-10-01 04:32:17.799971', 'step': 15553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:17.830748', 'step': 15553, 'epoch': 3} {'type': 'loss', 'content': 0.11459247767925262, 'timestamp': '2025-10-01 04:32:17.832810', 'step': 15554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:17.864699', 'step': 15554, 'epoch': 3} {'type': 'loss', 'content': 0.04434451833367348, 'timestamp': '2025-10-01 04:32:17.867204', 'step': 15555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:17.898497', 'step': 15555, 'epoch': 3} {'type': 'loss', 'content': 0.0668019950389862, 'timestamp': '2025-10-01 04:32:17.922084', 'step': 15556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:17.952757', 'step': 15556, 'epoch': 3} {'type': 'loss', 'content': 0.060811880975961685, 'timestamp': '2025-10-01 04:32:17.954839', 'step': 15557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:17.988656', 'step': 15557, 'epoch': 3} {'type': 'loss', 'content': 0.08448928594589233, 'timestamp': '2025-10-01 04:32:17.990870', 'step': 15558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.021621', 'step': 15558, 'epoch': 3} {'type': 'loss', 'content': 0.1453792154788971, 'timestamp': '2025-10-01 04:32:18.023872', 'step': 15559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:18.054781', 'step': 15559, 'epoch': 3} {'type': 'loss', 'content': 0.054724689573049545, 'timestamp': '2025-10-01 04:32:18.078216', 'step': 15560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.109025', 'step': 15560, 'epoch': 3} {'type': 'loss', 'content': 0.04178016260266304, 'timestamp': '2025-10-01 04:32:18.111116', 'step': 15561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.141403', 'step': 15561, 'epoch': 3} {'type': 'loss', 'content': 0.008087092079222202, 'timestamp': '2025-10-01 04:32:18.143464', 'step': 15562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.174117', 'step': 15562, 'epoch': 3} {'type': 'loss', 'content': 0.09017763286828995, 'timestamp': '2025-10-01 04:32:18.176253', 'step': 15563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.206624', 'step': 15563, 'epoch': 3} {'type': 'loss', 'content': 0.10061242431402206, 'timestamp': '2025-10-01 04:32:18.230318', 'step': 15564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:18.260669', 'step': 15564, 'epoch': 3} {'type': 'loss', 'content': 0.08089745044708252, 'timestamp': '2025-10-01 04:32:18.262632', 'step': 15565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.308382', 'step': 15565, 'epoch': 3} {'type': 'loss', 'content': 0.055716272443532944, 'timestamp': '2025-10-01 04:32:18.310445', 'step': 15566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:18.340217', 'step': 15566, 'epoch': 3} {'type': 'loss', 'content': 0.1350618451833725, 'timestamp': '2025-10-01 04:32:18.342460', 'step': 15567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:18.380428', 'step': 15567, 'epoch': 3} {'type': 'loss', 'content': 0.12658141553401947, 'timestamp': '2025-10-01 04:32:18.404115', 'step': 15568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.434302', 'step': 15568, 'epoch': 3} {'type': 'loss', 'content': 0.13623760640621185, 'timestamp': '2025-10-01 04:32:18.438212', 'step': 15569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:18.468359', 'step': 15569, 'epoch': 3} {'type': 'loss', 'content': 0.08468416333198547, 'timestamp': '2025-10-01 04:32:18.470528', 'step': 15570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:18.506745', 'step': 15570, 'epoch': 3} {'type': 'loss', 'content': 0.12078380584716797, 'timestamp': '2025-10-01 04:32:18.509623', 'step': 15571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:18.540126', 'step': 15571, 'epoch': 3} {'type': 'loss', 'content': 0.0332501083612442, 'timestamp': '2025-10-01 04:32:18.563760', 'step': 15572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:18.594469', 'step': 15572, 'epoch': 3} {'type': 'loss', 'content': 0.11809258162975311, 'timestamp': '2025-10-01 04:32:18.596637', 'step': 15573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.628562', 'step': 15573, 'epoch': 3} {'type': 'loss', 'content': 0.08314887434244156, 'timestamp': '2025-10-01 04:32:18.630693', 'step': 15574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:18.662967', 'step': 15574, 'epoch': 3} {'type': 'loss', 'content': 0.14802490174770355, 'timestamp': '2025-10-01 04:32:18.665179', 'step': 15575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.700236', 'step': 15575, 'epoch': 3} {'type': 'loss', 'content': 0.05475974828004837, 'timestamp': '2025-10-01 04:32:18.724039', 'step': 15576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:18.754310', 'step': 15576, 'epoch': 3} {'type': 'loss', 'content': 0.05552368238568306, 'timestamp': '2025-10-01 04:32:18.756507', 'step': 15577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.787442', 'step': 15577, 'epoch': 3} {'type': 'loss', 'content': 0.07702368497848511, 'timestamp': '2025-10-01 04:32:18.789533', 'step': 15578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:18.820299', 'step': 15578, 'epoch': 3} {'type': 'loss', 'content': 0.062360286712646484, 'timestamp': '2025-10-01 04:32:18.822482', 'step': 15579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.853996', 'step': 15579, 'epoch': 3} {'type': 'loss', 'content': 0.07189779728651047, 'timestamp': '2025-10-01 04:32:18.877812', 'step': 15580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.908589', 'step': 15580, 'epoch': 3} {'type': 'loss', 'content': 0.1844640076160431, 'timestamp': '2025-10-01 04:32:18.910848', 'step': 15581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.941470', 'step': 15581, 'epoch': 3} {'type': 'loss', 'content': 0.11057107150554657, 'timestamp': '2025-10-01 04:32:18.943766', 'step': 15582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:18.976435', 'step': 15582, 'epoch': 3} {'type': 'loss', 'content': 0.14422528445720673, 'timestamp': '2025-10-01 04:32:18.979377', 'step': 15583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:19.010498', 'step': 15583, 'epoch': 3} {'type': 'loss', 'content': 0.06872483342885971, 'timestamp': '2025-10-01 04:32:19.034268', 'step': 15584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:19.065352', 'step': 15584, 'epoch': 3} {'type': 'loss', 'content': 0.05221925303339958, 'timestamp': '2025-10-01 04:32:19.067589', 'step': 15585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:19.098478', 'step': 15585, 'epoch': 3} {'type': 'loss', 'content': 0.07513123750686646, 'timestamp': '2025-10-01 04:32:19.103056', 'step': 15586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:19.143406', 'step': 15586, 'epoch': 3} {'type': 'loss', 'content': 0.13046777248382568, 'timestamp': '2025-10-01 04:32:19.149281', 'step': 15587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:19.185684', 'step': 15587, 'epoch': 3} {'type': 'loss', 'content': 0.08278648555278778, 'timestamp': '2025-10-01 04:32:19.209479', 'step': 15588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:19.239827', 'step': 15588, 'epoch': 3} {'type': 'loss', 'content': 0.16800372302532196, 'timestamp': '2025-10-01 04:32:19.242052', 'step': 15589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:32:19.273566', 'step': 15589, 'epoch': 3} {'type': 'loss', 'content': 0.06912872195243835, 'timestamp': '2025-10-01 04:32:19.278006', 'step': 15590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:19.309083', 'step': 15590, 'epoch': 3} {'type': 'loss', 'content': 0.09547334164381027, 'timestamp': '2025-10-01 04:32:19.311234', 'step': 15591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:19.341870', 'step': 15591, 'epoch': 3} {'type': 'loss', 'content': 0.056919798254966736, 'timestamp': '2025-10-01 04:32:19.365461', 'step': 15592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:19.395861', 'step': 15592, 'epoch': 3} {'type': 'loss', 'content': 0.05693082511425018, 'timestamp': '2025-10-01 04:32:19.397922', 'step': 15593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:19.428453', 'step': 15593, 'epoch': 3} {'type': 'loss', 'content': 0.08341649174690247, 'timestamp': '2025-10-01 04:32:19.430738', 'step': 15594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:19.461401', 'step': 15594, 'epoch': 3} {'type': 'loss', 'content': 0.10052571445703506, 'timestamp': '2025-10-01 04:32:19.463792', 'step': 15595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:19.494516', 'step': 15595, 'epoch': 3} {'type': 'loss', 'content': 0.015587135218083858, 'timestamp': '2025-10-01 04:32:19.518145', 'step': 15596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:19.548731', 'step': 15596, 'epoch': 3} {'type': 'loss', 'content': 0.08148790895938873, 'timestamp': '2025-10-01 04:32:19.550875', 'step': 15597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:19.581343', 'step': 15597, 'epoch': 3} {'type': 'loss', 'content': 0.05119941011071205, 'timestamp': '2025-10-01 04:32:19.586742', 'step': 15598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:19.621103', 'step': 15598, 'epoch': 3} {'type': 'loss', 'content': 0.1238982304930687, 'timestamp': '2025-10-01 04:32:19.625602', 'step': 15599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:19.667280', 'step': 15599, 'epoch': 3} {'type': 'loss', 'content': 0.06688693910837173, 'timestamp': '2025-10-01 04:32:19.690696', 'step': 15600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:19.721138', 'step': 15600, 'epoch': 3} {'type': 'loss', 'content': 0.07375311106443405, 'timestamp': '2025-10-01 04:32:19.723179', 'step': 15601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:19.753433', 'step': 15601, 'epoch': 3} {'type': 'loss', 'content': 0.08519269526004791, 'timestamp': '2025-10-01 04:32:19.756405', 'step': 15602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:19.789812', 'step': 15602, 'epoch': 3} {'type': 'loss', 'content': 0.06481534987688065, 'timestamp': '2025-10-01 04:32:19.792153', 'step': 15603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:19.841315', 'step': 15603, 'epoch': 3} {'type': 'loss', 'content': 0.0503229945898056, 'timestamp': '2025-10-01 04:32:19.865048', 'step': 15604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:19.898213', 'step': 15604, 'epoch': 3} {'type': 'loss', 'content': 0.11232492327690125, 'timestamp': '2025-10-01 04:32:19.900306', 'step': 15605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:19.930919', 'step': 15605, 'epoch': 3} {'type': 'loss', 'content': 0.0916910395026207, 'timestamp': '2025-10-01 04:32:19.936288', 'step': 15606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:19.967013', 'step': 15606, 'epoch': 3} {'type': 'loss', 'content': 0.030814049765467644, 'timestamp': '2025-10-01 04:32:19.969157', 'step': 15607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:19.999868', 'step': 15607, 'epoch': 3} {'type': 'loss', 'content': 0.09590466320514679, 'timestamp': '2025-10-01 04:32:20.023474', 'step': 15608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:20.054657', 'step': 15608, 'epoch': 3} {'type': 'loss', 'content': 0.09316277503967285, 'timestamp': '2025-10-01 04:32:20.057375', 'step': 15609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:20.088575', 'step': 15609, 'epoch': 3} {'type': 'loss', 'content': 0.08102205395698547, 'timestamp': '2025-10-01 04:32:20.090975', 'step': 15610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.129603', 'step': 15610, 'epoch': 3} {'type': 'loss', 'content': 0.08073954284191132, 'timestamp': '2025-10-01 04:32:20.131727', 'step': 15611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:20.163930', 'step': 15611, 'epoch': 3} {'type': 'loss', 'content': 0.10188071429729462, 'timestamp': '2025-10-01 04:32:20.187573', 'step': 15612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.224186', 'step': 15612, 'epoch': 3} {'type': 'loss', 'content': 0.08989554643630981, 'timestamp': '2025-10-01 04:32:20.226306', 'step': 15613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.256579', 'step': 15613, 'epoch': 3} {'type': 'loss', 'content': 0.17974360287189484, 'timestamp': '2025-10-01 04:32:20.258681', 'step': 15614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:20.290504', 'step': 15614, 'epoch': 3} {'type': 'loss', 'content': 0.1100374162197113, 'timestamp': '2025-10-01 04:32:20.292530', 'step': 15615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:20.336224', 'step': 15615, 'epoch': 3} {'type': 'loss', 'content': 0.07119289040565491, 'timestamp': '2025-10-01 04:32:20.368253', 'step': 15616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.400552', 'step': 15616, 'epoch': 3} {'type': 'loss', 'content': 0.12482918798923492, 'timestamp': '2025-10-01 04:32:20.402955', 'step': 15617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:20.433650', 'step': 15617, 'epoch': 3} {'type': 'loss', 'content': 0.04911857470870018, 'timestamp': '2025-10-01 04:32:20.436048', 'step': 15618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.466607', 'step': 15618, 'epoch': 3} {'type': 'loss', 'content': 0.07013014703989029, 'timestamp': '2025-10-01 04:32:20.469184', 'step': 15619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.505539', 'step': 15619, 'epoch': 3} {'type': 'loss', 'content': 0.04165731742978096, 'timestamp': '2025-10-01 04:32:20.529194', 'step': 15620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:20.571783', 'step': 15620, 'epoch': 3} {'type': 'loss', 'content': 0.06528210639953613, 'timestamp': '2025-10-01 04:32:20.574506', 'step': 15621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:20.612140', 'step': 15621, 'epoch': 3} {'type': 'loss', 'content': 0.0168104600161314, 'timestamp': '2025-10-01 04:32:20.614998', 'step': 15622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.644963', 'step': 15622, 'epoch': 3} {'type': 'loss', 'content': 0.07241195440292358, 'timestamp': '2025-10-01 04:32:20.647242', 'step': 15623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:20.677987', 'step': 15623, 'epoch': 3} {'type': 'loss', 'content': 0.18963098526000977, 'timestamp': '2025-10-01 04:32:20.701729', 'step': 15624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:20.732304', 'step': 15624, 'epoch': 3} {'type': 'loss', 'content': 0.08976160734891891, 'timestamp': '2025-10-01 04:32:20.735132', 'step': 15625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.765625', 'step': 15625, 'epoch': 3} {'type': 'loss', 'content': 0.05723501369357109, 'timestamp': '2025-10-01 04:32:20.769138', 'step': 15626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:20.800270', 'step': 15626, 'epoch': 3} {'type': 'loss', 'content': 0.15101997554302216, 'timestamp': '2025-10-01 04:32:20.802752', 'step': 15627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.833756', 'step': 15627, 'epoch': 3} {'type': 'loss', 'content': 0.03249693289399147, 'timestamp': '2025-10-01 04:32:20.858166', 'step': 15628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:20.889628', 'step': 15628, 'epoch': 3} {'type': 'loss', 'content': 0.06509027630090714, 'timestamp': '2025-10-01 04:32:20.891969', 'step': 15629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.923153', 'step': 15629, 'epoch': 3} {'type': 'loss', 'content': 0.07542532682418823, 'timestamp': '2025-10-01 04:32:20.925689', 'step': 15630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:20.958805', 'step': 15630, 'epoch': 3} {'type': 'loss', 'content': 0.09450691938400269, 'timestamp': '2025-10-01 04:32:20.961141', 'step': 15631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:20.991816', 'step': 15631, 'epoch': 3} {'type': 'loss', 'content': 0.08352155983448029, 'timestamp': '2025-10-01 04:32:21.015433', 'step': 15632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:21.045557', 'step': 15632, 'epoch': 3} {'type': 'loss', 'content': 0.0501437783241272, 'timestamp': '2025-10-01 04:32:21.047838', 'step': 15633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:21.078341', 'step': 15633, 'epoch': 3} {'type': 'loss', 'content': 0.09675030410289764, 'timestamp': '2025-10-01 04:32:21.081058', 'step': 15634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:21.111957', 'step': 15634, 'epoch': 3} {'type': 'loss', 'content': 0.04238775372505188, 'timestamp': '2025-10-01 04:32:21.114217', 'step': 15635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:21.145081', 'step': 15635, 'epoch': 3} {'type': 'loss', 'content': 0.06972253322601318, 'timestamp': '2025-10-01 04:32:21.168789', 'step': 15636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:21.202047', 'step': 15636, 'epoch': 3} {'type': 'loss', 'content': 0.023272506892681122, 'timestamp': '2025-10-01 04:32:21.204102', 'step': 15637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:21.235845', 'step': 15637, 'epoch': 3} {'type': 'loss', 'content': 0.1113651692867279, 'timestamp': '2025-10-01 04:32:21.238560', 'step': 15638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:21.271230', 'step': 15638, 'epoch': 3} {'type': 'loss', 'content': 0.10491935163736343, 'timestamp': '2025-10-01 04:32:21.284386', 'step': 15639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:21.315485', 'step': 15639, 'epoch': 3} {'type': 'loss', 'content': 0.0335538350045681, 'timestamp': '2025-10-01 04:32:21.339386', 'step': 15640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:21.371103', 'step': 15640, 'epoch': 3} {'type': 'loss', 'content': 0.050997085869312286, 'timestamp': '2025-10-01 04:32:21.373341', 'step': 15641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:21.406348', 'step': 15641, 'epoch': 3} {'type': 'loss', 'content': 0.08413384109735489, 'timestamp': '2025-10-01 04:32:21.408566', 'step': 15642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:21.438456', 'step': 15642, 'epoch': 3} {'type': 'loss', 'content': 0.1342662125825882, 'timestamp': '2025-10-01 04:32:21.441256', 'step': 15643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:21.471662', 'step': 15643, 'epoch': 3} {'type': 'loss', 'content': 0.018095294013619423, 'timestamp': '2025-10-01 04:32:21.495300', 'step': 15644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:21.527465', 'step': 15644, 'epoch': 3} {'type': 'loss', 'content': 0.07579640299081802, 'timestamp': '2025-10-01 04:32:21.529631', 'step': 15645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:21.559648', 'step': 15645, 'epoch': 3} {'type': 'loss', 'content': 0.08187569677829742, 'timestamp': '2025-10-01 04:32:21.562130', 'step': 15646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:21.594479', 'step': 15646, 'epoch': 3} {'type': 'loss', 'content': 0.07291973382234573, 'timestamp': '2025-10-01 04:32:21.596746', 'step': 15647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:21.627600', 'step': 15647, 'epoch': 3} {'type': 'loss', 'content': 0.047997716814279556, 'timestamp': '2025-10-01 04:32:21.652436', 'step': 15648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:21.682966', 'step': 15648, 'epoch': 3} {'type': 'loss', 'content': 0.08443064242601395, 'timestamp': '2025-10-01 04:32:21.685071', 'step': 15649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:21.715214', 'step': 15649, 'epoch': 3} {'type': 'loss', 'content': 0.10589811950922012, 'timestamp': '2025-10-01 04:32:21.717469', 'step': 15650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:21.748069', 'step': 15650, 'epoch': 3} {'type': 'loss', 'content': 0.05671265721321106, 'timestamp': '2025-10-01 04:32:21.750232', 'step': 15651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:21.780731', 'step': 15651, 'epoch': 3} {'type': 'loss', 'content': 0.06030423194169998, 'timestamp': '2025-10-01 04:32:21.805628', 'step': 15652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:21.837107', 'step': 15652, 'epoch': 3} {'type': 'loss', 'content': 0.09624399989843369, 'timestamp': '2025-10-01 04:32:21.839580', 'step': 15653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:21.873029', 'step': 15653, 'epoch': 3} {'type': 'loss', 'content': 0.10003853589296341, 'timestamp': '2025-10-01 04:32:21.875354', 'step': 15654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:21.906293', 'step': 15654, 'epoch': 3} {'type': 'loss', 'content': 0.16668052971363068, 'timestamp': '2025-10-01 04:32:21.909528', 'step': 15655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:21.942705', 'step': 15655, 'epoch': 3} {'type': 'loss', 'content': 0.02750549092888832, 'timestamp': '2025-10-01 04:32:21.966396', 'step': 15656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:21.996336', 'step': 15656, 'epoch': 3} {'type': 'loss', 'content': 0.07973705232143402, 'timestamp': '2025-10-01 04:32:21.998871', 'step': 15657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:22.030446', 'step': 15657, 'epoch': 3} {'type': 'loss', 'content': 0.09893519431352615, 'timestamp': '2025-10-01 04:32:22.032670', 'step': 15658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:22.063688', 'step': 15658, 'epoch': 3} {'type': 'loss', 'content': 0.1270361989736557, 'timestamp': '2025-10-01 04:32:22.066191', 'step': 15659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:22.096721', 'step': 15659, 'epoch': 3} {'type': 'loss', 'content': 0.0401989221572876, 'timestamp': '2025-10-01 04:32:22.120791', 'step': 15660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:22.153649', 'step': 15660, 'epoch': 3} {'type': 'loss', 'content': 0.1220165491104126, 'timestamp': '2025-10-01 04:32:22.156069', 'step': 15661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:22.187408', 'step': 15661, 'epoch': 3} {'type': 'loss', 'content': 0.03755129873752594, 'timestamp': '2025-10-01 04:32:22.189768', 'step': 15662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:22.220497', 'step': 15662, 'epoch': 3} {'type': 'loss', 'content': 0.17488621175289154, 'timestamp': '2025-10-01 04:32:22.222770', 'step': 15663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:22.254762', 'step': 15663, 'epoch': 3} {'type': 'loss', 'content': 0.11127813905477524, 'timestamp': '2025-10-01 04:32:22.278677', 'step': 15664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:22.315059', 'step': 15664, 'epoch': 3} {'type': 'loss', 'content': 0.04830988869071007, 'timestamp': '2025-10-01 04:32:22.317515', 'step': 15665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:22.349178', 'step': 15665, 'epoch': 3} {'type': 'loss', 'content': 0.05193180963397026, 'timestamp': '2025-10-01 04:32:22.351783', 'step': 15666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:22.385554', 'step': 15666, 'epoch': 3} {'type': 'loss', 'content': 0.1374567449092865, 'timestamp': '2025-10-01 04:32:22.388063', 'step': 15667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:22.423206', 'step': 15667, 'epoch': 3} {'type': 'loss', 'content': 0.11274706572294235, 'timestamp': '2025-10-01 04:32:22.447450', 'step': 15668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:22.481744', 'step': 15668, 'epoch': 3} {'type': 'loss', 'content': 0.09151560813188553, 'timestamp': '2025-10-01 04:32:22.483849', 'step': 15669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:22.519699', 'step': 15669, 'epoch': 3} {'type': 'loss', 'content': 0.08887731283903122, 'timestamp': '2025-10-01 04:32:22.522038', 'step': 15670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:22.556333', 'step': 15670, 'epoch': 3} {'type': 'loss', 'content': 0.12197845429182053, 'timestamp': '2025-10-01 04:32:22.558765', 'step': 15671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:22.598674', 'step': 15671, 'epoch': 3} {'type': 'loss', 'content': 0.121132992208004, 'timestamp': '2025-10-01 04:32:22.623503', 'step': 15672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:22.654505', 'step': 15672, 'epoch': 3} {'type': 'loss', 'content': 0.09844378381967545, 'timestamp': '2025-10-01 04:32:22.656559', 'step': 15673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:22.687078', 'step': 15673, 'epoch': 3} {'type': 'loss', 'content': 0.13621914386749268, 'timestamp': '2025-10-01 04:32:22.689986', 'step': 15674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:22.723448', 'step': 15674, 'epoch': 3} {'type': 'loss', 'content': 0.05394616350531578, 'timestamp': '2025-10-01 04:32:22.725754', 'step': 15675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:22.756315', 'step': 15675, 'epoch': 3} {'type': 'loss', 'content': 0.06790871918201447, 'timestamp': '2025-10-01 04:32:22.780120', 'step': 15676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:22.813738', 'step': 15676, 'epoch': 3} {'type': 'loss', 'content': 0.07753846794366837, 'timestamp': '2025-10-01 04:32:22.816177', 'step': 15677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:22.850285', 'step': 15677, 'epoch': 3} {'type': 'loss', 'content': 0.18921111524105072, 'timestamp': '2025-10-01 04:32:22.853776', 'step': 15678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:22.895557', 'step': 15678, 'epoch': 3} {'type': 'loss', 'content': 0.028514651581645012, 'timestamp': '2025-10-01 04:32:22.899261', 'step': 15679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:22.935332', 'step': 15679, 'epoch': 3} {'type': 'loss', 'content': 0.0355118103325367, 'timestamp': '2025-10-01 04:32:22.959359', 'step': 15680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:22.992556', 'step': 15680, 'epoch': 3} {'type': 'loss', 'content': 0.07681246101856232, 'timestamp': '2025-10-01 04:32:22.997229', 'step': 15681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:23.034030', 'step': 15681, 'epoch': 3} {'type': 'loss', 'content': 0.054977383464574814, 'timestamp': '2025-10-01 04:32:23.037879', 'step': 15682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:23.071812', 'step': 15682, 'epoch': 3} {'type': 'loss', 'content': 0.04590475931763649, 'timestamp': '2025-10-01 04:32:23.076919', 'step': 15683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:23.111145', 'step': 15683, 'epoch': 3} {'type': 'loss', 'content': 0.04151053726673126, 'timestamp': '2025-10-01 04:32:23.138544', 'step': 15684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:23.174299', 'step': 15684, 'epoch': 3} {'type': 'loss', 'content': 0.14644065499305725, 'timestamp': '2025-10-01 04:32:23.183165', 'step': 15685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:23.216003', 'step': 15685, 'epoch': 3} {'type': 'loss', 'content': 0.10864627361297607, 'timestamp': '2025-10-01 04:32:23.218351', 'step': 15686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:23.250692', 'step': 15686, 'epoch': 3} {'type': 'loss', 'content': 0.039166409522295, 'timestamp': '2025-10-01 04:32:23.267175', 'step': 15687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:23.314965', 'step': 15687, 'epoch': 3} {'type': 'loss', 'content': 0.14448261260986328, 'timestamp': '2025-10-01 04:32:23.339227', 'step': 15688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:23.371160', 'step': 15688, 'epoch': 3} {'type': 'loss', 'content': 0.10709822922945023, 'timestamp': '2025-10-01 04:32:23.373219', 'step': 15689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:23.403776', 'step': 15689, 'epoch': 3} {'type': 'loss', 'content': 0.04733844846487045, 'timestamp': '2025-10-01 04:32:23.406924', 'step': 15690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:23.440553', 'step': 15690, 'epoch': 3} {'type': 'loss', 'content': 0.06489746272563934, 'timestamp': '2025-10-01 04:32:23.443397', 'step': 15691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:23.479065', 'step': 15691, 'epoch': 3} {'type': 'loss', 'content': 0.017262887209653854, 'timestamp': '2025-10-01 04:32:23.504022', 'step': 15692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:23.535688', 'step': 15692, 'epoch': 3} {'type': 'loss', 'content': 0.08861143887042999, 'timestamp': '2025-10-01 04:32:23.538601', 'step': 15693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:23.571269', 'step': 15693, 'epoch': 3} {'type': 'loss', 'content': 0.07192075252532959, 'timestamp': '2025-10-01 04:32:23.573628', 'step': 15694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:23.604601', 'step': 15694, 'epoch': 3} {'type': 'loss', 'content': 0.08947372436523438, 'timestamp': '2025-10-01 04:32:23.607365', 'step': 15695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:23.638049', 'step': 15695, 'epoch': 3} {'type': 'loss', 'content': 0.056844111531972885, 'timestamp': '2025-10-01 04:32:23.661883', 'step': 15696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:23.695232', 'step': 15696, 'epoch': 3} {'type': 'loss', 'content': 0.14782306551933289, 'timestamp': '2025-10-01 04:32:23.697818', 'step': 15697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:23.728828', 'step': 15697, 'epoch': 3} {'type': 'loss', 'content': 0.015092194080352783, 'timestamp': '2025-10-01 04:32:23.731160', 'step': 15698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:23.761119', 'step': 15698, 'epoch': 3} {'type': 'loss', 'content': 0.09587923437356949, 'timestamp': '2025-10-01 04:32:23.763696', 'step': 15699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:23.794196', 'step': 15699, 'epoch': 3} {'type': 'loss', 'content': 0.10589989274740219, 'timestamp': '2025-10-01 04:32:23.819747', 'step': 15700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:23.850439', 'step': 15700, 'epoch': 3} {'type': 'loss', 'content': 0.04053739458322525, 'timestamp': '2025-10-01 04:32:23.852870', 'step': 15701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:23.885668', 'step': 15701, 'epoch': 3} {'type': 'loss', 'content': 0.07564977556467056, 'timestamp': '2025-10-01 04:32:23.888185', 'step': 15702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:23.919378', 'step': 15702, 'epoch': 3} {'type': 'loss', 'content': 0.06906958669424057, 'timestamp': '2025-10-01 04:32:23.922198', 'step': 15703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:23.953691', 'step': 15703, 'epoch': 3} {'type': 'loss', 'content': 0.0741407573223114, 'timestamp': '2025-10-01 04:32:23.977658', 'step': 15704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.009165', 'step': 15704, 'epoch': 3} {'type': 'loss', 'content': 0.0698874369263649, 'timestamp': '2025-10-01 04:32:24.011433', 'step': 15705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.042618', 'step': 15705, 'epoch': 3} {'type': 'loss', 'content': 0.12217389047145844, 'timestamp': '2025-10-01 04:32:24.044833', 'step': 15706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:24.078407', 'step': 15706, 'epoch': 3} {'type': 'loss', 'content': 0.08548140525817871, 'timestamp': '2025-10-01 04:32:24.080681', 'step': 15707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:24.111888', 'step': 15707, 'epoch': 3} {'type': 'loss', 'content': 0.048718392848968506, 'timestamp': '2025-10-01 04:32:24.135662', 'step': 15708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.166388', 'step': 15708, 'epoch': 3} {'type': 'loss', 'content': 0.041932858526706696, 'timestamp': '2025-10-01 04:32:24.168567', 'step': 15709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:24.208696', 'step': 15709, 'epoch': 3} {'type': 'loss', 'content': 0.1147579625248909, 'timestamp': '2025-10-01 04:32:24.211225', 'step': 15710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:24.241912', 'step': 15710, 'epoch': 3} {'type': 'loss', 'content': 0.11681444942951202, 'timestamp': '2025-10-01 04:32:24.244529', 'step': 15711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.275753', 'step': 15711, 'epoch': 3} {'type': 'loss', 'content': 0.08388339728116989, 'timestamp': '2025-10-01 04:32:24.299883', 'step': 15712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.330007', 'step': 15712, 'epoch': 3} {'type': 'loss', 'content': 0.08429865539073944, 'timestamp': '2025-10-01 04:32:24.332988', 'step': 15713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:24.363702', 'step': 15713, 'epoch': 3} {'type': 'loss', 'content': 0.08739814162254333, 'timestamp': '2025-10-01 04:32:24.366884', 'step': 15714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:24.397685', 'step': 15714, 'epoch': 3} {'type': 'loss', 'content': 0.06117742136120796, 'timestamp': '2025-10-01 04:32:24.400254', 'step': 15715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.431504', 'step': 15715, 'epoch': 3} {'type': 'loss', 'content': 0.07682529091835022, 'timestamp': '2025-10-01 04:32:24.455893', 'step': 15716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:24.487779', 'step': 15716, 'epoch': 3} {'type': 'loss', 'content': 0.058988381177186966, 'timestamp': '2025-10-01 04:32:24.490296', 'step': 15717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:24.521967', 'step': 15717, 'epoch': 3} {'type': 'loss', 'content': 0.023481257259845734, 'timestamp': '2025-10-01 04:32:24.524163', 'step': 15718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:24.555515', 'step': 15718, 'epoch': 3} {'type': 'loss', 'content': 0.0571003295481205, 'timestamp': '2025-10-01 04:32:24.557752', 'step': 15719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.588421', 'step': 15719, 'epoch': 3} {'type': 'loss', 'content': 0.0454200841486454, 'timestamp': '2025-10-01 04:32:24.612234', 'step': 15720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.644216', 'step': 15720, 'epoch': 3} {'type': 'loss', 'content': 0.09659132361412048, 'timestamp': '2025-10-01 04:32:24.646455', 'step': 15721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:24.678756', 'step': 15721, 'epoch': 3} {'type': 'loss', 'content': 0.06464475393295288, 'timestamp': '2025-10-01 04:32:24.680979', 'step': 15722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:24.711780', 'step': 15722, 'epoch': 3} {'type': 'loss', 'content': 0.14131145179271698, 'timestamp': '2025-10-01 04:32:24.714124', 'step': 15723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:24.745221', 'step': 15723, 'epoch': 3} {'type': 'loss', 'content': 0.03479066491127014, 'timestamp': '2025-10-01 04:32:24.769026', 'step': 15724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.802581', 'step': 15724, 'epoch': 3} {'type': 'loss', 'content': 0.020606040954589844, 'timestamp': '2025-10-01 04:32:24.804982', 'step': 15725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.840035', 'step': 15725, 'epoch': 3} {'type': 'loss', 'content': 0.07959675788879395, 'timestamp': '2025-10-01 04:32:24.842701', 'step': 15726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:24.874559', 'step': 15726, 'epoch': 3} {'type': 'loss', 'content': 0.06759168207645416, 'timestamp': '2025-10-01 04:32:24.876773', 'step': 15727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:24.908667', 'step': 15727, 'epoch': 3} {'type': 'loss', 'content': 0.09726675599813461, 'timestamp': '2025-10-01 04:32:24.932534', 'step': 15728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:24.971800', 'step': 15728, 'epoch': 3} {'type': 'loss', 'content': 0.08828824013471603, 'timestamp': '2025-10-01 04:32:24.974096', 'step': 15729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:25.005594', 'step': 15729, 'epoch': 3} {'type': 'loss', 'content': 0.07970549166202545, 'timestamp': '2025-10-01 04:32:25.008628', 'step': 15730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:25.040028', 'step': 15730, 'epoch': 3} {'type': 'loss', 'content': 0.046053335070610046, 'timestamp': '2025-10-01 04:32:25.042847', 'step': 15731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:25.076438', 'step': 15731, 'epoch': 3} {'type': 'loss', 'content': 0.15180903673171997, 'timestamp': '2025-10-01 04:32:25.100526', 'step': 15732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:25.137213', 'step': 15732, 'epoch': 3} {'type': 'loss', 'content': 0.02812105417251587, 'timestamp': '2025-10-01 04:32:25.139958', 'step': 15733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:25.174731', 'step': 15733, 'epoch': 3} {'type': 'loss', 'content': 0.049819495528936386, 'timestamp': '2025-10-01 04:32:25.177144', 'step': 15734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:32:25.210568', 'step': 15734, 'epoch': 3} {'type': 'loss', 'content': 0.07144878059625626, 'timestamp': '2025-10-01 04:32:25.213954', 'step': 15735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:25.247776', 'step': 15735, 'epoch': 3} {'type': 'loss', 'content': 0.03466711565852165, 'timestamp': '2025-10-01 04:32:25.271867', 'step': 15736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:25.302931', 'step': 15736, 'epoch': 3} {'type': 'loss', 'content': 0.12525860965251923, 'timestamp': '2025-10-01 04:32:25.305303', 'step': 15737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:25.336564', 'step': 15737, 'epoch': 3} {'type': 'loss', 'content': 0.08935008943080902, 'timestamp': '2025-10-01 04:32:25.339736', 'step': 15738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:25.371656', 'step': 15738, 'epoch': 3} {'type': 'loss', 'content': 0.11185172200202942, 'timestamp': '2025-10-01 04:32:25.375255', 'step': 15739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:25.406656', 'step': 15739, 'epoch': 3} {'type': 'loss', 'content': 0.09166835248470306, 'timestamp': '2025-10-01 04:32:25.430960', 'step': 15740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:25.462783', 'step': 15740, 'epoch': 3} {'type': 'loss', 'content': 0.07813737541437149, 'timestamp': '2025-10-01 04:32:25.465165', 'step': 15741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:25.501521', 'step': 15741, 'epoch': 3} {'type': 'loss', 'content': 0.04228673130273819, 'timestamp': '2025-10-01 04:32:25.503751', 'step': 15742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:25.535788', 'step': 15742, 'epoch': 3} {'type': 'loss', 'content': 0.06325050443410873, 'timestamp': '2025-10-01 04:32:25.538916', 'step': 15743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:25.570473', 'step': 15743, 'epoch': 3} {'type': 'loss', 'content': 0.05507367476820946, 'timestamp': '2025-10-01 04:32:25.594587', 'step': 15744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:25.624953', 'step': 15744, 'epoch': 3} {'type': 'loss', 'content': 0.11056230217218399, 'timestamp': '2025-10-01 04:32:25.627529', 'step': 15745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:25.661109', 'step': 15745, 'epoch': 3} {'type': 'loss', 'content': 0.05030577629804611, 'timestamp': '2025-10-01 04:32:25.663302', 'step': 15746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:25.694180', 'step': 15746, 'epoch': 3} {'type': 'loss', 'content': 0.0964801087975502, 'timestamp': '2025-10-01 04:32:25.696732', 'step': 15747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:25.727518', 'step': 15747, 'epoch': 3} {'type': 'loss', 'content': 0.08920222520828247, 'timestamp': '2025-10-01 04:32:25.751158', 'step': 15748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:25.783004', 'step': 15748, 'epoch': 3} {'type': 'loss', 'content': 0.08166180551052094, 'timestamp': '2025-10-01 04:32:25.786633', 'step': 15749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:25.817459', 'step': 15749, 'epoch': 3} {'type': 'loss', 'content': 0.05408595874905586, 'timestamp': '2025-10-01 04:32:25.820290', 'step': 15750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:25.850817', 'step': 15750, 'epoch': 3} {'type': 'loss', 'content': 0.08288814127445221, 'timestamp': '2025-10-01 04:32:25.853146', 'step': 15751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:25.883523', 'step': 15751, 'epoch': 3} {'type': 'loss', 'content': 0.09708983451128006, 'timestamp': '2025-10-01 04:32:25.907780', 'step': 15752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:25.938605', 'step': 15752, 'epoch': 3} {'type': 'loss', 'content': 0.03675507381558418, 'timestamp': '2025-10-01 04:32:25.940815', 'step': 15753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:25.971200', 'step': 15753, 'epoch': 3} {'type': 'loss', 'content': 0.05601925775408745, 'timestamp': '2025-10-01 04:32:25.973660', 'step': 15754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:26.008142', 'step': 15754, 'epoch': 3} {'type': 'loss', 'content': 0.07660728693008423, 'timestamp': '2025-10-01 04:32:26.010635', 'step': 15755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:26.044278', 'step': 15755, 'epoch': 3} {'type': 'loss', 'content': 0.024627413600683212, 'timestamp': '2025-10-01 04:32:26.067879', 'step': 15756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.098214', 'step': 15756, 'epoch': 3} {'type': 'loss', 'content': 0.1315149962902069, 'timestamp': '2025-10-01 04:32:26.100268', 'step': 15757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:26.131605', 'step': 15757, 'epoch': 3} {'type': 'loss', 'content': 0.1116376593708992, 'timestamp': '2025-10-01 04:32:26.134435', 'step': 15758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:26.164531', 'step': 15758, 'epoch': 3} {'type': 'loss', 'content': 0.15984231233596802, 'timestamp': '2025-10-01 04:32:26.166752', 'step': 15759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.197803', 'step': 15759, 'epoch': 3} {'type': 'loss', 'content': 0.09421353042125702, 'timestamp': '2025-10-01 04:32:26.222004', 'step': 15760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:26.252868', 'step': 15760, 'epoch': 3} {'type': 'loss', 'content': 0.1460925042629242, 'timestamp': '2025-10-01 04:32:26.255095', 'step': 15761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.286055', 'step': 15761, 'epoch': 3} {'type': 'loss', 'content': 0.02147306501865387, 'timestamp': '2025-10-01 04:32:26.288531', 'step': 15762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.318616', 'step': 15762, 'epoch': 3} {'type': 'loss', 'content': 0.12918785214424133, 'timestamp': '2025-10-01 04:32:26.321608', 'step': 15763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.351570', 'step': 15763, 'epoch': 3} {'type': 'loss', 'content': 0.08648037165403366, 'timestamp': '2025-10-01 04:32:26.375333', 'step': 15764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:26.406709', 'step': 15764, 'epoch': 3} {'type': 'loss', 'content': 0.07700058817863464, 'timestamp': '2025-10-01 04:32:26.409541', 'step': 15765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:26.439856', 'step': 15765, 'epoch': 3} {'type': 'loss', 'content': 0.10630582273006439, 'timestamp': '2025-10-01 04:32:26.441868', 'step': 15766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.471680', 'step': 15766, 'epoch': 3} {'type': 'loss', 'content': 0.0379333421587944, 'timestamp': '2025-10-01 04:32:26.474048', 'step': 15767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:26.507883', 'step': 15767, 'epoch': 3} {'type': 'loss', 'content': 0.08874490857124329, 'timestamp': '2025-10-01 04:32:26.531974', 'step': 15768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.569201', 'step': 15768, 'epoch': 3} {'type': 'loss', 'content': 0.09508208930492401, 'timestamp': '2025-10-01 04:32:26.571801', 'step': 15769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:26.603560', 'step': 15769, 'epoch': 3} {'type': 'loss', 'content': 0.07379493117332458, 'timestamp': '2025-10-01 04:32:26.605795', 'step': 15770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:26.637001', 'step': 15770, 'epoch': 3} {'type': 'loss', 'content': 0.09806619584560394, 'timestamp': '2025-10-01 04:32:26.639307', 'step': 15771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.670401', 'step': 15771, 'epoch': 3} {'type': 'loss', 'content': 0.04462001472711563, 'timestamp': '2025-10-01 04:32:26.695372', 'step': 15772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.725764', 'step': 15772, 'epoch': 3} {'type': 'loss', 'content': 0.006641290616244078, 'timestamp': '2025-10-01 04:32:26.727967', 'step': 15773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:26.758498', 'step': 15773, 'epoch': 3} {'type': 'loss', 'content': 0.09834441542625427, 'timestamp': '2025-10-01 04:32:26.761049', 'step': 15774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:26.791497', 'step': 15774, 'epoch': 3} {'type': 'loss', 'content': 0.03154241666197777, 'timestamp': '2025-10-01 04:32:26.793837', 'step': 15775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.825250', 'step': 15775, 'epoch': 3} {'type': 'loss', 'content': 0.10460273921489716, 'timestamp': '2025-10-01 04:32:26.848985', 'step': 15776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:26.879573', 'step': 15776, 'epoch': 3} {'type': 'loss', 'content': 0.09286689013242722, 'timestamp': '2025-10-01 04:32:26.881657', 'step': 15777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:26.912247', 'step': 15777, 'epoch': 3} {'type': 'loss', 'content': 0.16736599802970886, 'timestamp': '2025-10-01 04:32:26.914423', 'step': 15778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:26.945322', 'step': 15778, 'epoch': 3} {'type': 'loss', 'content': 0.024957304820418358, 'timestamp': '2025-10-01 04:32:26.947481', 'step': 15779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:26.978044', 'step': 15779, 'epoch': 3} {'type': 'loss', 'content': 0.05009564384818077, 'timestamp': '2025-10-01 04:32:27.001654', 'step': 15780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:27.033354', 'step': 15780, 'epoch': 3} {'type': 'loss', 'content': 0.05043920502066612, 'timestamp': '2025-10-01 04:32:27.035777', 'step': 15781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:27.065795', 'step': 15781, 'epoch': 3} {'type': 'loss', 'content': 0.052693650126457214, 'timestamp': '2025-10-01 04:32:27.068121', 'step': 15782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:27.098744', 'step': 15782, 'epoch': 3} {'type': 'loss', 'content': 0.1768869012594223, 'timestamp': '2025-10-01 04:32:27.101019', 'step': 15783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:27.131588', 'step': 15783, 'epoch': 3} {'type': 'loss', 'content': 0.048736389726400375, 'timestamp': '2025-10-01 04:32:27.155529', 'step': 15784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:27.185675', 'step': 15784, 'epoch': 3} {'type': 'loss', 'content': 0.10463812947273254, 'timestamp': '2025-10-01 04:32:27.187889', 'step': 15785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:27.218090', 'step': 15785, 'epoch': 3} {'type': 'loss', 'content': 0.09042564034461975, 'timestamp': '2025-10-01 04:32:27.220428', 'step': 15786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:27.251450', 'step': 15786, 'epoch': 3} {'type': 'loss', 'content': 0.03983885049819946, 'timestamp': '2025-10-01 04:32:27.253738', 'step': 15787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:27.283868', 'step': 15787, 'epoch': 3} {'type': 'loss', 'content': 0.16487006843090057, 'timestamp': '2025-10-01 04:32:27.307531', 'step': 15788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:27.338053', 'step': 15788, 'epoch': 3} {'type': 'loss', 'content': 0.11648401618003845, 'timestamp': '2025-10-01 04:32:27.340299', 'step': 15789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:27.370697', 'step': 15789, 'epoch': 3} {'type': 'loss', 'content': 0.10824102908372879, 'timestamp': '2025-10-01 04:32:27.372706', 'step': 15790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:27.403389', 'step': 15790, 'epoch': 3} {'type': 'loss', 'content': 0.033852532505989075, 'timestamp': '2025-10-01 04:32:27.405470', 'step': 15791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:27.436647', 'step': 15791, 'epoch': 3} {'type': 'loss', 'content': 0.10728204995393753, 'timestamp': '2025-10-01 04:32:27.460218', 'step': 15792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:27.490818', 'step': 15792, 'epoch': 3} {'type': 'loss', 'content': 0.05621203035116196, 'timestamp': '2025-10-01 04:32:27.493047', 'step': 15793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:27.524079', 'step': 15793, 'epoch': 3} {'type': 'loss', 'content': 0.10283605009317398, 'timestamp': '2025-10-01 04:32:27.526252', 'step': 15794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:27.555888', 'step': 15794, 'epoch': 3} {'type': 'loss', 'content': 0.06949854642152786, 'timestamp': '2025-10-01 04:32:27.558085', 'step': 15795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:27.588052', 'step': 15795, 'epoch': 3} {'type': 'loss', 'content': 0.06476462632417679, 'timestamp': '2025-10-01 04:32:27.611843', 'step': 15796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:27.642719', 'step': 15796, 'epoch': 3} {'type': 'loss', 'content': 0.09424924850463867, 'timestamp': '2025-10-01 04:32:27.645127', 'step': 15797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:27.675043', 'step': 15797, 'epoch': 3} {'type': 'loss', 'content': 0.10144946724176407, 'timestamp': '2025-10-01 04:32:27.678130', 'step': 15798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:27.708621', 'step': 15798, 'epoch': 3} {'type': 'loss', 'content': 0.08313208818435669, 'timestamp': '2025-10-01 04:32:27.710731', 'step': 15799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:27.741016', 'step': 15799, 'epoch': 3} {'type': 'loss', 'content': 0.050807490944862366, 'timestamp': '2025-10-01 04:32:27.764545', 'step': 15800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:27.795188', 'step': 15800, 'epoch': 3} {'type': 'loss', 'content': 0.03563500568270683, 'timestamp': '2025-10-01 04:32:27.797271', 'step': 15801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:27.832389', 'step': 15801, 'epoch': 3} {'type': 'loss', 'content': 0.11100015789270401, 'timestamp': '2025-10-01 04:32:27.837798', 'step': 15802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:27.881042', 'step': 15802, 'epoch': 3} {'type': 'loss', 'content': 0.06696620583534241, 'timestamp': '2025-10-01 04:32:27.883563', 'step': 15803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:27.913634', 'step': 15803, 'epoch': 3} {'type': 'loss', 'content': 0.10969085246324539, 'timestamp': '2025-10-01 04:32:27.937095', 'step': 15804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:27.979841', 'step': 15804, 'epoch': 3} {'type': 'loss', 'content': 0.0775044783949852, 'timestamp': '2025-10-01 04:32:27.984149', 'step': 15805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:28.016861', 'step': 15805, 'epoch': 3} {'type': 'loss', 'content': 0.17505228519439697, 'timestamp': '2025-10-01 04:32:28.019720', 'step': 15806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:28.051197', 'step': 15806, 'epoch': 3} {'type': 'loss', 'content': 0.11025334149599075, 'timestamp': '2025-10-01 04:32:28.054896', 'step': 15807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:28.086301', 'step': 15807, 'epoch': 3} {'type': 'loss', 'content': 0.154905766248703, 'timestamp': '2025-10-01 04:32:28.110111', 'step': 15808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:28.140825', 'step': 15808, 'epoch': 3} {'type': 'loss', 'content': 0.16085684299468994, 'timestamp': '2025-10-01 04:32:28.142977', 'step': 15809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:28.174716', 'step': 15809, 'epoch': 3} {'type': 'loss', 'content': 0.028764404356479645, 'timestamp': '2025-10-01 04:32:28.178190', 'step': 15810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:28.209826', 'step': 15810, 'epoch': 3} {'type': 'loss', 'content': 0.061232198029756546, 'timestamp': '2025-10-01 04:32:28.212327', 'step': 15811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:28.243770', 'step': 15811, 'epoch': 3} {'type': 'loss', 'content': 0.07093778997659683, 'timestamp': '2025-10-01 04:32:28.267631', 'step': 15812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:28.298097', 'step': 15812, 'epoch': 3} {'type': 'loss', 'content': 0.1069202572107315, 'timestamp': '2025-10-01 04:32:28.300651', 'step': 15813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:28.332716', 'step': 15813, 'epoch': 3} {'type': 'loss', 'content': 0.1016315147280693, 'timestamp': '2025-10-01 04:32:28.335054', 'step': 15814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:28.365763', 'step': 15814, 'epoch': 3} {'type': 'loss', 'content': 0.053463131189346313, 'timestamp': '2025-10-01 04:32:28.370306', 'step': 15815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:28.401921', 'step': 15815, 'epoch': 3} {'type': 'loss', 'content': 0.07265005260705948, 'timestamp': '2025-10-01 04:32:28.426076', 'step': 15816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:28.459219', 'step': 15816, 'epoch': 3} {'type': 'loss', 'content': 0.06958406418561935, 'timestamp': '2025-10-01 04:32:28.461471', 'step': 15817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:28.492396', 'step': 15817, 'epoch': 3} {'type': 'loss', 'content': 0.07044468075037003, 'timestamp': '2025-10-01 04:32:28.494566', 'step': 15818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:28.525683', 'step': 15818, 'epoch': 3} {'type': 'loss', 'content': 0.11448930203914642, 'timestamp': '2025-10-01 04:32:28.528246', 'step': 15819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:28.559559', 'step': 15819, 'epoch': 3} {'type': 'loss', 'content': 0.1431427150964737, 'timestamp': '2025-10-01 04:32:28.583299', 'step': 15820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:28.613465', 'step': 15820, 'epoch': 3} {'type': 'loss', 'content': 0.05851466581225395, 'timestamp': '2025-10-01 04:32:28.615773', 'step': 15821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:28.646627', 'step': 15821, 'epoch': 3} {'type': 'loss', 'content': 0.06051921099424362, 'timestamp': '2025-10-01 04:32:28.648836', 'step': 15822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:28.680343', 'step': 15822, 'epoch': 3} {'type': 'loss', 'content': 0.04297156259417534, 'timestamp': '2025-10-01 04:32:28.682750', 'step': 15823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:28.714209', 'step': 15823, 'epoch': 3} {'type': 'loss', 'content': 0.07715912163257599, 'timestamp': '2025-10-01 04:32:28.738047', 'step': 15824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:28.770030', 'step': 15824, 'epoch': 3} {'type': 'loss', 'content': 0.10213643312454224, 'timestamp': '2025-10-01 04:32:28.772527', 'step': 15825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:28.803451', 'step': 15825, 'epoch': 3} {'type': 'loss', 'content': 0.08950790762901306, 'timestamp': '2025-10-01 04:32:28.805903', 'step': 15826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:28.837699', 'step': 15826, 'epoch': 3} {'type': 'loss', 'content': 0.08277862519025803, 'timestamp': '2025-10-01 04:32:28.840037', 'step': 15827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:28.870791', 'step': 15827, 'epoch': 3} {'type': 'loss', 'content': 0.06254077702760696, 'timestamp': '2025-10-01 04:32:28.894224', 'step': 15828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:28.925608', 'step': 15828, 'epoch': 3} {'type': 'loss', 'content': 0.1166853979229927, 'timestamp': '2025-10-01 04:32:28.928248', 'step': 15829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:28.958961', 'step': 15829, 'epoch': 3} {'type': 'loss', 'content': 0.11993557214736938, 'timestamp': '2025-10-01 04:32:28.961208', 'step': 15830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:28.992865', 'step': 15830, 'epoch': 3} {'type': 'loss', 'content': 0.1508854329586029, 'timestamp': '2025-10-01 04:32:28.995099', 'step': 15831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:29.025786', 'step': 15831, 'epoch': 3} {'type': 'loss', 'content': 0.06511349231004715, 'timestamp': '2025-10-01 04:32:29.049909', 'step': 15832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:29.081573', 'step': 15832, 'epoch': 3} {'type': 'loss', 'content': 0.10410834103822708, 'timestamp': '2025-10-01 04:32:29.084159', 'step': 15833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:29.114841', 'step': 15833, 'epoch': 3} {'type': 'loss', 'content': 0.05856231227517128, 'timestamp': '2025-10-01 04:32:29.117426', 'step': 15834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:32:29.148540', 'step': 15834, 'epoch': 3} {'type': 'loss', 'content': 0.20885716378688812, 'timestamp': '2025-10-01 04:32:29.156095', 'step': 15835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:29.188587', 'step': 15835, 'epoch': 3} {'type': 'loss', 'content': 0.08370279520750046, 'timestamp': '2025-10-01 04:32:29.212266', 'step': 15836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:29.242461', 'step': 15836, 'epoch': 3} {'type': 'loss', 'content': 0.09691449254751205, 'timestamp': '2025-10-01 04:32:29.245283', 'step': 15837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:29.275712', 'step': 15837, 'epoch': 3} {'type': 'loss', 'content': 0.07790198177099228, 'timestamp': '2025-10-01 04:32:29.278049', 'step': 15838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:29.308613', 'step': 15838, 'epoch': 3} {'type': 'loss', 'content': 0.12186935544013977, 'timestamp': '2025-10-01 04:32:29.310753', 'step': 15839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:29.340281', 'step': 15839, 'epoch': 3} {'type': 'loss', 'content': 0.07114670425653458, 'timestamp': '2025-10-01 04:32:29.363970', 'step': 15840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:29.395256', 'step': 15840, 'epoch': 3} {'type': 'loss', 'content': 0.1131846085190773, 'timestamp': '2025-10-01 04:32:29.397523', 'step': 15841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:29.429391', 'step': 15841, 'epoch': 3} {'type': 'loss', 'content': 0.0687936395406723, 'timestamp': '2025-10-01 04:32:29.431717', 'step': 15842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:29.462112', 'step': 15842, 'epoch': 3} {'type': 'loss', 'content': 0.09725502878427505, 'timestamp': '2025-10-01 04:32:29.464329', 'step': 15843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:29.494510', 'step': 15843, 'epoch': 3} {'type': 'loss', 'content': 0.15255002677440643, 'timestamp': '2025-10-01 04:32:29.517951', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:32:37.673817', 'step': 15844, 'epoch': 3} {'type': 'pplx', 'content': 11491.629934142848, 'timestamp': '2025-10-01 04:32:37.676747', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:37.707789', 'step': 15844, 'epoch': 3} {'type': 'loss', 'content': 0.07038851827383041, 'timestamp': '2025-10-01 04:32:37.709975', 'step': 15845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:37.742743', 'step': 15845, 'epoch': 3} {'type': 'loss', 'content': 0.1678629368543625, 'timestamp': '2025-10-01 04:32:37.744892', 'step': 15846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:37.787894', 'step': 15846, 'epoch': 3} {'type': 'loss', 'content': 0.14935509860515594, 'timestamp': '2025-10-01 04:32:37.790048', 'step': 15847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:37.821021', 'step': 15847, 'epoch': 3} {'type': 'loss', 'content': 0.06272601336240768, 'timestamp': '2025-10-01 04:32:37.844859', 'step': 15848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:37.875698', 'step': 15848, 'epoch': 3} {'type': 'loss', 'content': 0.09808160364627838, 'timestamp': '2025-10-01 04:32:37.878013', 'step': 15849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:37.908577', 'step': 15849, 'epoch': 3} {'type': 'loss', 'content': 0.06184433773159981, 'timestamp': '2025-10-01 04:32:37.911059', 'step': 15850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:37.942005', 'step': 15850, 'epoch': 3} {'type': 'loss', 'content': 0.0801786407828331, 'timestamp': '2025-10-01 04:32:37.944189', 'step': 15851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:37.974689', 'step': 15851, 'epoch': 3} {'type': 'loss', 'content': 0.09296998381614685, 'timestamp': '2025-10-01 04:32:37.998388', 'step': 15852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.028834', 'step': 15852, 'epoch': 3} {'type': 'loss', 'content': 0.04869774729013443, 'timestamp': '2025-10-01 04:32:38.038684', 'step': 15853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:38.069014', 'step': 15853, 'epoch': 3} {'type': 'loss', 'content': 0.036968231201171875, 'timestamp': '2025-10-01 04:32:38.071195', 'step': 15854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:38.102780', 'step': 15854, 'epoch': 3} {'type': 'loss', 'content': 0.08059253543615341, 'timestamp': '2025-10-01 04:32:38.104951', 'step': 15855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:38.136507', 'step': 15855, 'epoch': 3} {'type': 'loss', 'content': 0.04854842647910118, 'timestamp': '2025-10-01 04:32:38.160333', 'step': 15856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:38.190899', 'step': 15856, 'epoch': 3} {'type': 'loss', 'content': 0.054670412093400955, 'timestamp': '2025-10-01 04:32:38.193112', 'step': 15857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:38.223121', 'step': 15857, 'epoch': 3} {'type': 'loss', 'content': 0.0862969160079956, 'timestamp': '2025-10-01 04:32:38.225351', 'step': 15858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:32:38.257172', 'step': 15858, 'epoch': 3} {'type': 'loss', 'content': 0.07479503750801086, 'timestamp': '2025-10-01 04:32:38.261589', 'step': 15859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.293812', 'step': 15859, 'epoch': 3} {'type': 'loss', 'content': 0.08386418223381042, 'timestamp': '2025-10-01 04:32:38.317591', 'step': 15860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.349024', 'step': 15860, 'epoch': 3} {'type': 'loss', 'content': 0.07230333983898163, 'timestamp': '2025-10-01 04:32:38.351154', 'step': 15861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.383170', 'step': 15861, 'epoch': 3} {'type': 'loss', 'content': 0.022370832040905952, 'timestamp': '2025-10-01 04:32:38.385428', 'step': 15862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:38.416149', 'step': 15862, 'epoch': 3} {'type': 'loss', 'content': 0.11487533152103424, 'timestamp': '2025-10-01 04:32:38.418286', 'step': 15863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.449153', 'step': 15863, 'epoch': 3} {'type': 'loss', 'content': 0.02061428502202034, 'timestamp': '2025-10-01 04:32:38.473299', 'step': 15864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:38.522432', 'step': 15864, 'epoch': 3} {'type': 'loss', 'content': 0.06920860707759857, 'timestamp': '2025-10-01 04:32:38.525053', 'step': 15865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:38.555935', 'step': 15865, 'epoch': 3} {'type': 'loss', 'content': 0.05064329132437706, 'timestamp': '2025-10-01 04:32:38.558127', 'step': 15866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.588372', 'step': 15866, 'epoch': 3} {'type': 'loss', 'content': 0.1131296455860138, 'timestamp': '2025-10-01 04:32:38.590776', 'step': 15867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:38.621573', 'step': 15867, 'epoch': 3} {'type': 'loss', 'content': 0.1049768254160881, 'timestamp': '2025-10-01 04:32:38.645957', 'step': 15868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:38.676365', 'step': 15868, 'epoch': 3} {'type': 'loss', 'content': 0.09306260943412781, 'timestamp': '2025-10-01 04:32:38.678574', 'step': 15869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.708714', 'step': 15869, 'epoch': 3} {'type': 'loss', 'content': 0.08114293217658997, 'timestamp': '2025-10-01 04:32:38.711079', 'step': 15870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:38.742111', 'step': 15870, 'epoch': 3} {'type': 'loss', 'content': 0.059714287519454956, 'timestamp': '2025-10-01 04:32:38.744439', 'step': 15871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:38.774537', 'step': 15871, 'epoch': 3} {'type': 'loss', 'content': 0.06623005867004395, 'timestamp': '2025-10-01 04:32:38.798339', 'step': 15872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.829168', 'step': 15872, 'epoch': 3} {'type': 'loss', 'content': 0.1533951312303543, 'timestamp': '2025-10-01 04:32:38.831431', 'step': 15873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:38.861810', 'step': 15873, 'epoch': 3} {'type': 'loss', 'content': 0.02362513355910778, 'timestamp': '2025-10-01 04:32:38.864307', 'step': 15874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.896395', 'step': 15874, 'epoch': 3} {'type': 'loss', 'content': 0.09373997151851654, 'timestamp': '2025-10-01 04:32:38.898630', 'step': 15875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.930074', 'step': 15875, 'epoch': 3} {'type': 'loss', 'content': 0.05681776627898216, 'timestamp': '2025-10-01 04:32:38.953862', 'step': 15876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:38.984708', 'step': 15876, 'epoch': 3} {'type': 'loss', 'content': 0.08177369087934494, 'timestamp': '2025-10-01 04:32:38.986908', 'step': 15877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:39.018796', 'step': 15877, 'epoch': 3} {'type': 'loss', 'content': 0.10286835581064224, 'timestamp': '2025-10-01 04:32:39.021170', 'step': 15878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:39.053669', 'step': 15878, 'epoch': 3} {'type': 'loss', 'content': 0.1640581488609314, 'timestamp': '2025-10-01 04:32:39.056051', 'step': 15879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:39.087183', 'step': 15879, 'epoch': 3} {'type': 'loss', 'content': 0.07082944363355637, 'timestamp': '2025-10-01 04:32:39.110920', 'step': 15880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:39.141667', 'step': 15880, 'epoch': 3} {'type': 'loss', 'content': 0.10411813855171204, 'timestamp': '2025-10-01 04:32:39.143888', 'step': 15881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:39.174051', 'step': 15881, 'epoch': 3} {'type': 'loss', 'content': 0.06904751807451248, 'timestamp': '2025-10-01 04:32:39.176262', 'step': 15882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:39.207820', 'step': 15882, 'epoch': 3} {'type': 'loss', 'content': 0.1116606742143631, 'timestamp': '2025-10-01 04:32:39.210080', 'step': 15883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:39.241595', 'step': 15883, 'epoch': 3} {'type': 'loss', 'content': 0.09225394576787949, 'timestamp': '2025-10-01 04:32:39.265382', 'step': 15884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:39.310128', 'step': 15884, 'epoch': 3} {'type': 'loss', 'content': 0.07720739394426346, 'timestamp': '2025-10-01 04:32:39.312148', 'step': 15885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:39.342414', 'step': 15885, 'epoch': 3} {'type': 'loss', 'content': 0.030727606266736984, 'timestamp': '2025-10-01 04:32:39.344570', 'step': 15886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:39.379576', 'step': 15886, 'epoch': 3} {'type': 'loss', 'content': 0.06671693921089172, 'timestamp': '2025-10-01 04:32:39.382389', 'step': 15887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:39.414258', 'step': 15887, 'epoch': 3} {'type': 'loss', 'content': 0.11024574935436249, 'timestamp': '2025-10-01 04:32:39.438532', 'step': 15888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:39.469163', 'step': 15888, 'epoch': 3} {'type': 'loss', 'content': 0.060743000358343124, 'timestamp': '2025-10-01 04:32:39.471262', 'step': 15889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:39.501745', 'step': 15889, 'epoch': 3} {'type': 'loss', 'content': 0.11669236421585083, 'timestamp': '2025-10-01 04:32:39.504040', 'step': 15890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:39.535376', 'step': 15890, 'epoch': 3} {'type': 'loss', 'content': 0.08496372401714325, 'timestamp': '2025-10-01 04:32:39.537611', 'step': 15891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:39.568084', 'step': 15891, 'epoch': 3} {'type': 'loss', 'content': 0.15090399980545044, 'timestamp': '2025-10-01 04:32:39.592078', 'step': 15892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:39.624759', 'step': 15892, 'epoch': 3} {'type': 'loss', 'content': 0.09447842091321945, 'timestamp': '2025-10-01 04:32:39.627144', 'step': 15893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:39.658250', 'step': 15893, 'epoch': 3} {'type': 'loss', 'content': 0.0566045343875885, 'timestamp': '2025-10-01 04:32:39.660651', 'step': 15894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:39.696810', 'step': 15894, 'epoch': 3} {'type': 'loss', 'content': 0.09912171959877014, 'timestamp': '2025-10-01 04:32:39.699113', 'step': 15895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:39.743730', 'step': 15895, 'epoch': 3} {'type': 'loss', 'content': 0.07360807806253433, 'timestamp': '2025-10-01 04:32:39.767577', 'step': 15896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:39.800523', 'step': 15896, 'epoch': 3} {'type': 'loss', 'content': 0.04729057103395462, 'timestamp': '2025-10-01 04:32:39.802645', 'step': 15897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:39.833331', 'step': 15897, 'epoch': 3} {'type': 'loss', 'content': 0.11807146668434143, 'timestamp': '2025-10-01 04:32:39.836818', 'step': 15898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:39.867702', 'step': 15898, 'epoch': 3} {'type': 'loss', 'content': 0.07642319798469543, 'timestamp': '2025-10-01 04:32:39.869767', 'step': 15899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:39.902088', 'step': 15899, 'epoch': 3} {'type': 'loss', 'content': 0.051961787045001984, 'timestamp': '2025-10-01 04:32:39.925889', 'step': 15900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:39.956596', 'step': 15900, 'epoch': 3} {'type': 'loss', 'content': 0.0911826640367508, 'timestamp': '2025-10-01 04:32:39.959305', 'step': 15901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:39.991410', 'step': 15901, 'epoch': 3} {'type': 'loss', 'content': 0.09775031358003616, 'timestamp': '2025-10-01 04:32:39.993524', 'step': 15902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:40.024195', 'step': 15902, 'epoch': 3} {'type': 'loss', 'content': 0.024192022159695625, 'timestamp': '2025-10-01 04:32:40.026388', 'step': 15903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:40.057257', 'step': 15903, 'epoch': 3} {'type': 'loss', 'content': 0.06000339239835739, 'timestamp': '2025-10-01 04:32:40.081075', 'step': 15904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:40.113377', 'step': 15904, 'epoch': 3} {'type': 'loss', 'content': 0.09625498205423355, 'timestamp': '2025-10-01 04:32:40.115642', 'step': 15905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:40.146963', 'step': 15905, 'epoch': 3} {'type': 'loss', 'content': 0.06065171957015991, 'timestamp': '2025-10-01 04:32:40.149233', 'step': 15906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:40.181862', 'step': 15906, 'epoch': 3} {'type': 'loss', 'content': 0.06632429361343384, 'timestamp': '2025-10-01 04:32:40.184159', 'step': 15907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:40.219557', 'step': 15907, 'epoch': 3} {'type': 'loss', 'content': 0.07068099826574326, 'timestamp': '2025-10-01 04:32:40.243851', 'step': 15908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:40.289215', 'step': 15908, 'epoch': 3} {'type': 'loss', 'content': 0.10815688967704773, 'timestamp': '2025-10-01 04:32:40.299634', 'step': 15909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:40.330451', 'step': 15909, 'epoch': 3} {'type': 'loss', 'content': 0.08486257493495941, 'timestamp': '2025-10-01 04:32:40.332688', 'step': 15910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:40.363734', 'step': 15910, 'epoch': 3} {'type': 'loss', 'content': 0.036942481994628906, 'timestamp': '2025-10-01 04:32:40.366131', 'step': 15911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:40.398876', 'step': 15911, 'epoch': 3} {'type': 'loss', 'content': 0.09493566304445267, 'timestamp': '2025-10-01 04:32:40.423204', 'step': 15912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:40.454809', 'step': 15912, 'epoch': 3} {'type': 'loss', 'content': 0.06326742470264435, 'timestamp': '2025-10-01 04:32:40.457128', 'step': 15913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:40.490216', 'step': 15913, 'epoch': 3} {'type': 'loss', 'content': 0.07266413420438766, 'timestamp': '2025-10-01 04:32:40.492446', 'step': 15914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:40.525370', 'step': 15914, 'epoch': 3} {'type': 'loss', 'content': 0.0420406274497509, 'timestamp': '2025-10-01 04:32:40.528241', 'step': 15915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:40.565201', 'step': 15915, 'epoch': 3} {'type': 'loss', 'content': 0.07790767401456833, 'timestamp': '2025-10-01 04:32:40.589291', 'step': 15916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:40.620090', 'step': 15916, 'epoch': 3} {'type': 'loss', 'content': 0.07930814474821091, 'timestamp': '2025-10-01 04:32:40.622308', 'step': 15917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:40.653066', 'step': 15917, 'epoch': 3} {'type': 'loss', 'content': 0.10104081779718399, 'timestamp': '2025-10-01 04:32:40.655217', 'step': 15918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:40.687162', 'step': 15918, 'epoch': 3} {'type': 'loss', 'content': 0.10491857677698135, 'timestamp': '2025-10-01 04:32:40.689561', 'step': 15919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:40.720307', 'step': 15919, 'epoch': 3} {'type': 'loss', 'content': 0.0535300113260746, 'timestamp': '2025-10-01 04:32:40.744182', 'step': 15920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:40.774567', 'step': 15920, 'epoch': 3} {'type': 'loss', 'content': 0.07151052355766296, 'timestamp': '2025-10-01 04:32:40.777070', 'step': 15921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:40.808936', 'step': 15921, 'epoch': 3} {'type': 'loss', 'content': 0.061987489461898804, 'timestamp': '2025-10-01 04:32:40.811350', 'step': 15922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:40.855143', 'step': 15922, 'epoch': 3} {'type': 'loss', 'content': 0.13137799501419067, 'timestamp': '2025-10-01 04:32:40.857526', 'step': 15923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:40.889804', 'step': 15923, 'epoch': 3} {'type': 'loss', 'content': 0.05868222191929817, 'timestamp': '2025-10-01 04:32:40.913659', 'step': 15924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:40.954462', 'step': 15924, 'epoch': 3} {'type': 'loss', 'content': 0.031181558966636658, 'timestamp': '2025-10-01 04:32:40.956791', 'step': 15925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:40.989919', 'step': 15925, 'epoch': 3} {'type': 'loss', 'content': 0.07525955885648727, 'timestamp': '2025-10-01 04:32:40.992073', 'step': 15926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:41.022589', 'step': 15926, 'epoch': 3} {'type': 'loss', 'content': 0.05871254950761795, 'timestamp': '2025-10-01 04:32:41.025197', 'step': 15927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:41.057017', 'step': 15927, 'epoch': 3} {'type': 'loss', 'content': 0.06484754383563995, 'timestamp': '2025-10-01 04:32:41.081866', 'step': 15928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:41.112708', 'step': 15928, 'epoch': 3} {'type': 'loss', 'content': 0.08418910950422287, 'timestamp': '2025-10-01 04:32:41.114972', 'step': 15929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:41.153809', 'step': 15929, 'epoch': 3} {'type': 'loss', 'content': 0.03960034251213074, 'timestamp': '2025-10-01 04:32:41.155978', 'step': 15930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:41.187730', 'step': 15930, 'epoch': 3} {'type': 'loss', 'content': 0.15501412749290466, 'timestamp': '2025-10-01 04:32:41.191928', 'step': 15931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:41.223392', 'step': 15931, 'epoch': 3} {'type': 'loss', 'content': 0.06067190691828728, 'timestamp': '2025-10-01 04:32:41.247277', 'step': 15932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:41.277825', 'step': 15932, 'epoch': 3} {'type': 'loss', 'content': 0.07889807224273682, 'timestamp': '2025-10-01 04:32:41.279983', 'step': 15933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:41.310163', 'step': 15933, 'epoch': 3} {'type': 'loss', 'content': 0.07750429958105087, 'timestamp': '2025-10-01 04:32:41.312332', 'step': 15934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:41.343159', 'step': 15934, 'epoch': 3} {'type': 'loss', 'content': 0.07774186134338379, 'timestamp': '2025-10-01 04:32:41.345379', 'step': 15935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:41.376683', 'step': 15935, 'epoch': 3} {'type': 'loss', 'content': 0.10361018031835556, 'timestamp': '2025-10-01 04:32:41.400578', 'step': 15936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:41.432045', 'step': 15936, 'epoch': 3} {'type': 'loss', 'content': 0.03912981227040291, 'timestamp': '2025-10-01 04:32:41.434442', 'step': 15937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:41.466216', 'step': 15937, 'epoch': 3} {'type': 'loss', 'content': 0.06029859557747841, 'timestamp': '2025-10-01 04:32:41.468426', 'step': 15938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:41.499727', 'step': 15938, 'epoch': 3} {'type': 'loss', 'content': 0.05460979416966438, 'timestamp': '2025-10-01 04:32:41.501980', 'step': 15939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:41.532861', 'step': 15939, 'epoch': 3} {'type': 'loss', 'content': 0.12156512588262558, 'timestamp': '2025-10-01 04:32:41.556465', 'step': 15940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:41.587796', 'step': 15940, 'epoch': 3} {'type': 'loss', 'content': 0.07399272173643112, 'timestamp': '2025-10-01 04:32:41.589975', 'step': 15941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:41.619914', 'step': 15941, 'epoch': 3} {'type': 'loss', 'content': 0.12371501326560974, 'timestamp': '2025-10-01 04:32:41.622016', 'step': 15942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:41.653753', 'step': 15942, 'epoch': 3} {'type': 'loss', 'content': 0.09087353944778442, 'timestamp': '2025-10-01 04:32:41.655965', 'step': 15943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:41.686613', 'step': 15943, 'epoch': 3} {'type': 'loss', 'content': 0.03937513008713722, 'timestamp': '2025-10-01 04:32:41.710655', 'step': 15944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:41.753552', 'step': 15944, 'epoch': 3} {'type': 'loss', 'content': 0.06429938971996307, 'timestamp': '2025-10-01 04:32:41.755793', 'step': 15945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:41.786354', 'step': 15945, 'epoch': 3} {'type': 'loss', 'content': 0.07266717404127121, 'timestamp': '2025-10-01 04:32:41.788519', 'step': 15946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:41.818622', 'step': 15946, 'epoch': 3} {'type': 'loss', 'content': 0.05012452229857445, 'timestamp': '2025-10-01 04:32:41.820810', 'step': 15947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:41.850868', 'step': 15947, 'epoch': 3} {'type': 'loss', 'content': 0.09818952530622482, 'timestamp': '2025-10-01 04:32:41.874602', 'step': 15948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:41.905462', 'step': 15948, 'epoch': 3} {'type': 'loss', 'content': 0.10376264154911041, 'timestamp': '2025-10-01 04:32:41.908102', 'step': 15949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:41.940184', 'step': 15949, 'epoch': 3} {'type': 'loss', 'content': 0.03539781644940376, 'timestamp': '2025-10-01 04:32:41.943159', 'step': 15950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:41.974138', 'step': 15950, 'epoch': 3} {'type': 'loss', 'content': 0.1407923698425293, 'timestamp': '2025-10-01 04:32:41.976500', 'step': 15951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:42.008206', 'step': 15951, 'epoch': 3} {'type': 'loss', 'content': 0.05361514165997505, 'timestamp': '2025-10-01 04:32:42.031889', 'step': 15952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:42.062388', 'step': 15952, 'epoch': 3} {'type': 'loss', 'content': 0.1956353634595871, 'timestamp': '2025-10-01 04:32:42.064511', 'step': 15953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:42.099024', 'step': 15953, 'epoch': 3} {'type': 'loss', 'content': 0.06581480801105499, 'timestamp': '2025-10-01 04:32:42.101135', 'step': 15954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:42.132230', 'step': 15954, 'epoch': 3} {'type': 'loss', 'content': 0.11059843003749847, 'timestamp': '2025-10-01 04:32:42.134237', 'step': 15955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:42.165975', 'step': 15955, 'epoch': 3} {'type': 'loss', 'content': 0.04640967771410942, 'timestamp': '2025-10-01 04:32:42.189602', 'step': 15956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:42.220080', 'step': 15956, 'epoch': 3} {'type': 'loss', 'content': 0.06462603062391281, 'timestamp': '2025-10-01 04:32:42.222163', 'step': 15957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:42.254268', 'step': 15957, 'epoch': 3} {'type': 'loss', 'content': 0.055980946868658066, 'timestamp': '2025-10-01 04:32:42.256262', 'step': 15958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:42.287340', 'step': 15958, 'epoch': 3} {'type': 'loss', 'content': 0.07710902392864227, 'timestamp': '2025-10-01 04:32:42.301445', 'step': 15959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:32:42.339632', 'step': 15959, 'epoch': 3} {'type': 'loss', 'content': 0.05171605199575424, 'timestamp': '2025-10-01 04:32:42.365069', 'step': 15960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:42.396451', 'step': 15960, 'epoch': 3} {'type': 'loss', 'content': 0.10990121960639954, 'timestamp': '2025-10-01 04:32:42.398646', 'step': 15961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:42.429632', 'step': 15961, 'epoch': 3} {'type': 'loss', 'content': 0.09199517965316772, 'timestamp': '2025-10-01 04:32:42.431682', 'step': 15962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:42.462951', 'step': 15962, 'epoch': 3} {'type': 'loss', 'content': 0.041455648839473724, 'timestamp': '2025-10-01 04:32:42.465402', 'step': 15963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:42.496557', 'step': 15963, 'epoch': 3} {'type': 'loss', 'content': 0.10764119029045105, 'timestamp': '2025-10-01 04:32:42.521020', 'step': 15964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:42.553303', 'step': 15964, 'epoch': 3} {'type': 'loss', 'content': 0.10334683954715729, 'timestamp': '2025-10-01 04:32:42.555539', 'step': 15965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:42.589506', 'step': 15965, 'epoch': 3} {'type': 'loss', 'content': 0.016367055475711823, 'timestamp': '2025-10-01 04:32:42.591843', 'step': 15966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:42.622335', 'step': 15966, 'epoch': 3} {'type': 'loss', 'content': 0.11049403995275497, 'timestamp': '2025-10-01 04:32:42.638693', 'step': 15967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:42.670357', 'step': 15967, 'epoch': 3} {'type': 'loss', 'content': 0.06721793115139008, 'timestamp': '2025-10-01 04:32:42.693935', 'step': 15968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:42.726692', 'step': 15968, 'epoch': 3} {'type': 'loss', 'content': 0.02780795469880104, 'timestamp': '2025-10-01 04:32:42.735440', 'step': 15969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:42.766581', 'step': 15969, 'epoch': 3} {'type': 'loss', 'content': 0.07069242000579834, 'timestamp': '2025-10-01 04:32:42.768662', 'step': 15970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:42.799236', 'step': 15970, 'epoch': 3} {'type': 'loss', 'content': 0.06908716261386871, 'timestamp': '2025-10-01 04:32:42.801327', 'step': 15971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:42.832638', 'step': 15971, 'epoch': 3} {'type': 'loss', 'content': 0.05465395376086235, 'timestamp': '2025-10-01 04:32:42.856168', 'step': 15972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:42.886874', 'step': 15972, 'epoch': 3} {'type': 'loss', 'content': 0.0677633062005043, 'timestamp': '2025-10-01 04:32:42.889290', 'step': 15973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:42.931188', 'step': 15973, 'epoch': 3} {'type': 'loss', 'content': 0.06286487728357315, 'timestamp': '2025-10-01 04:32:42.933932', 'step': 15974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:42.964804', 'step': 15974, 'epoch': 3} {'type': 'loss', 'content': 0.07476512342691422, 'timestamp': '2025-10-01 04:32:42.967037', 'step': 15975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:32:42.997394', 'step': 15975, 'epoch': 3} {'type': 'loss', 'content': 0.13033640384674072, 'timestamp': '2025-10-01 04:32:43.022705', 'step': 15976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.053218', 'step': 15976, 'epoch': 3} {'type': 'loss', 'content': 0.17778341472148895, 'timestamp': '2025-10-01 04:32:43.055351', 'step': 15977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.085742', 'step': 15977, 'epoch': 3} {'type': 'loss', 'content': 0.08036229759454727, 'timestamp': '2025-10-01 04:32:43.088185', 'step': 15978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:43.120878', 'step': 15978, 'epoch': 3} {'type': 'loss', 'content': 0.07900287955999374, 'timestamp': '2025-10-01 04:32:43.124228', 'step': 15979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:43.154986', 'step': 15979, 'epoch': 3} {'type': 'loss', 'content': 0.08830814063549042, 'timestamp': '2025-10-01 04:32:43.178836', 'step': 15980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:43.209232', 'step': 15980, 'epoch': 3} {'type': 'loss', 'content': 0.18098217248916626, 'timestamp': '2025-10-01 04:32:43.211282', 'step': 15981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:43.241701', 'step': 15981, 'epoch': 3} {'type': 'loss', 'content': 0.060800567269325256, 'timestamp': '2025-10-01 04:32:43.243913', 'step': 15982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.274721', 'step': 15982, 'epoch': 3} {'type': 'loss', 'content': 0.03501882776618004, 'timestamp': '2025-10-01 04:32:43.277006', 'step': 15983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:43.307631', 'step': 15983, 'epoch': 3} {'type': 'loss', 'content': 0.07886672019958496, 'timestamp': '2025-10-01 04:32:43.332941', 'step': 15984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.362922', 'step': 15984, 'epoch': 3} {'type': 'loss', 'content': 0.0882325991988182, 'timestamp': '2025-10-01 04:32:43.365267', 'step': 15985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.395697', 'step': 15985, 'epoch': 3} {'type': 'loss', 'content': 0.09130926430225372, 'timestamp': '2025-10-01 04:32:43.397797', 'step': 15986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:43.428370', 'step': 15986, 'epoch': 3} {'type': 'loss', 'content': 0.06441452354192734, 'timestamp': '2025-10-01 04:32:43.430804', 'step': 15987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:43.460828', 'step': 15987, 'epoch': 3} {'type': 'loss', 'content': 0.10690519213676453, 'timestamp': '2025-10-01 04:32:43.484514', 'step': 15988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:43.515240', 'step': 15988, 'epoch': 3} {'type': 'loss', 'content': 0.10596867650747299, 'timestamp': '2025-10-01 04:32:43.517506', 'step': 15989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:43.550717', 'step': 15989, 'epoch': 3} {'type': 'loss', 'content': 0.07466228306293488, 'timestamp': '2025-10-01 04:32:43.568059', 'step': 15990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.598411', 'step': 15990, 'epoch': 3} {'type': 'loss', 'content': 0.08757434785366058, 'timestamp': '2025-10-01 04:32:43.600535', 'step': 15991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:43.632283', 'step': 15991, 'epoch': 3} {'type': 'loss', 'content': 0.0330226868391037, 'timestamp': '2025-10-01 04:32:43.655822', 'step': 15992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.690278', 'step': 15992, 'epoch': 3} {'type': 'loss', 'content': 0.10434646904468536, 'timestamp': '2025-10-01 04:32:43.692326', 'step': 15993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.723176', 'step': 15993, 'epoch': 3} {'type': 'loss', 'content': 0.07873371243476868, 'timestamp': '2025-10-01 04:32:43.725514', 'step': 15994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:43.756553', 'step': 15994, 'epoch': 3} {'type': 'loss', 'content': 0.07513735443353653, 'timestamp': '2025-10-01 04:32:43.758902', 'step': 15995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.790295', 'step': 15995, 'epoch': 3} {'type': 'loss', 'content': 0.08434441685676575, 'timestamp': '2025-10-01 04:32:43.814138', 'step': 15996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.844949', 'step': 15996, 'epoch': 3} {'type': 'loss', 'content': 0.11730138957500458, 'timestamp': '2025-10-01 04:32:43.847269', 'step': 15997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:43.878122', 'step': 15997, 'epoch': 3} {'type': 'loss', 'content': 0.033176857978105545, 'timestamp': '2025-10-01 04:32:43.880281', 'step': 15998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:43.911339', 'step': 15998, 'epoch': 3} {'type': 'loss', 'content': 0.02473016269505024, 'timestamp': '2025-10-01 04:32:43.913559', 'step': 15999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:43.945007', 'step': 15999, 'epoch': 3} {'type': 'loss', 'content': 0.14350175857543945, 'timestamp': '2025-10-01 04:32:43.968667', 'step': 16000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16000', 'timestamp': '2025-10-01 04:32:49.411053', 'step': 16000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:49.446314', 'step': 16000, 'epoch': 3} {'type': 'loss', 'content': 0.04002358391880989, 'timestamp': '2025-10-01 04:32:49.448426', 'step': 16001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:49.480684', 'step': 16001, 'epoch': 3} {'type': 'loss', 'content': 0.020811134949326515, 'timestamp': '2025-10-01 04:32:49.483112', 'step': 16002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:49.523392', 'step': 16002, 'epoch': 3} {'type': 'loss', 'content': 0.09989362210035324, 'timestamp': '2025-10-01 04:32:49.525946', 'step': 16003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:49.558757', 'step': 16003, 'epoch': 3} {'type': 'loss', 'content': 0.059073932468891144, 'timestamp': '2025-10-01 04:32:49.582933', 'step': 16004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:49.633628', 'step': 16004, 'epoch': 3} {'type': 'loss', 'content': 0.07855898141860962, 'timestamp': '2025-10-01 04:32:49.636922', 'step': 16005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:49.669267', 'step': 16005, 'epoch': 3} {'type': 'loss', 'content': 0.07118318229913712, 'timestamp': '2025-10-01 04:32:49.671716', 'step': 16006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:49.702801', 'step': 16006, 'epoch': 3} {'type': 'loss', 'content': 0.08080077171325684, 'timestamp': '2025-10-01 04:32:49.704951', 'step': 16007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:49.745283', 'step': 16007, 'epoch': 3} {'type': 'loss', 'content': 0.05768962576985359, 'timestamp': '2025-10-01 04:32:49.769513', 'step': 16008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:49.809584', 'step': 16008, 'epoch': 3} {'type': 'loss', 'content': 0.03877461329102516, 'timestamp': '2025-10-01 04:32:49.812235', 'step': 16009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:49.845371', 'step': 16009, 'epoch': 3} {'type': 'loss', 'content': 0.14251743257045746, 'timestamp': '2025-10-01 04:32:49.847543', 'step': 16010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:49.882468', 'step': 16010, 'epoch': 3} {'type': 'loss', 'content': 0.0646207332611084, 'timestamp': '2025-10-01 04:32:49.885219', 'step': 16011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:49.917169', 'step': 16011, 'epoch': 3} {'type': 'loss', 'content': 0.06489545851945877, 'timestamp': '2025-10-01 04:32:49.941087', 'step': 16012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:49.991464', 'step': 16012, 'epoch': 3} {'type': 'loss', 'content': 0.0902477502822876, 'timestamp': '2025-10-01 04:32:49.993963', 'step': 16013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:50.027096', 'step': 16013, 'epoch': 3} {'type': 'loss', 'content': 0.047562118619680405, 'timestamp': '2025-10-01 04:32:50.029296', 'step': 16014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:50.061519', 'step': 16014, 'epoch': 3} {'type': 'loss', 'content': 0.05464288964867592, 'timestamp': '2025-10-01 04:32:50.068077', 'step': 16015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:50.100201', 'step': 16015, 'epoch': 3} {'type': 'loss', 'content': 0.06846524029970169, 'timestamp': '2025-10-01 04:32:50.123895', 'step': 16016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:50.155721', 'step': 16016, 'epoch': 3} {'type': 'loss', 'content': 0.1281871348619461, 'timestamp': '2025-10-01 04:32:50.157908', 'step': 16017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:50.192098', 'step': 16017, 'epoch': 3} {'type': 'loss', 'content': 0.08183647692203522, 'timestamp': '2025-10-01 04:32:50.194897', 'step': 16018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:50.226539', 'step': 16018, 'epoch': 3} {'type': 'loss', 'content': 0.09335669130086899, 'timestamp': '2025-10-01 04:32:50.228915', 'step': 16019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:50.261887', 'step': 16019, 'epoch': 3} {'type': 'loss', 'content': 0.05773396044969559, 'timestamp': '2025-10-01 04:32:50.285928', 'step': 16020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:50.319874', 'step': 16020, 'epoch': 3} {'type': 'loss', 'content': 0.03289220854640007, 'timestamp': '2025-10-01 04:32:50.321886', 'step': 16021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:50.354041', 'step': 16021, 'epoch': 3} {'type': 'loss', 'content': 0.04761048033833504, 'timestamp': '2025-10-01 04:32:50.356142', 'step': 16022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:50.388669', 'step': 16022, 'epoch': 3} {'type': 'loss', 'content': 0.090416319668293, 'timestamp': '2025-10-01 04:32:50.391474', 'step': 16023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:50.423720', 'step': 16023, 'epoch': 3} {'type': 'loss', 'content': 0.06284727901220322, 'timestamp': '2025-10-01 04:32:50.447297', 'step': 16024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:50.482314', 'step': 16024, 'epoch': 3} {'type': 'loss', 'content': 0.05011885240674019, 'timestamp': '2025-10-01 04:32:50.484365', 'step': 16025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:50.518510', 'step': 16025, 'epoch': 3} {'type': 'loss', 'content': 0.056484855711460114, 'timestamp': '2025-10-01 04:32:50.524218', 'step': 16026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:50.559981', 'step': 16026, 'epoch': 3} {'type': 'loss', 'content': 0.08346434682607651, 'timestamp': '2025-10-01 04:32:50.562052', 'step': 16027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:50.598998', 'step': 16027, 'epoch': 3} {'type': 'loss', 'content': 0.1111518144607544, 'timestamp': '2025-10-01 04:32:50.622989', 'step': 16028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:50.660344', 'step': 16028, 'epoch': 3} {'type': 'loss', 'content': 0.04615040495991707, 'timestamp': '2025-10-01 04:32:50.662434', 'step': 16029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:50.697717', 'step': 16029, 'epoch': 3} {'type': 'loss', 'content': 0.07100223749876022, 'timestamp': '2025-10-01 04:32:50.699773', 'step': 16030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:50.732129', 'step': 16030, 'epoch': 3} {'type': 'loss', 'content': 0.10625613480806351, 'timestamp': '2025-10-01 04:32:50.734620', 'step': 16031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:50.767891', 'step': 16031, 'epoch': 3} {'type': 'loss', 'content': 0.07638303190469742, 'timestamp': '2025-10-01 04:32:50.791583', 'step': 16032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:50.831194', 'step': 16032, 'epoch': 3} {'type': 'loss', 'content': 0.1047491505742073, 'timestamp': '2025-10-01 04:32:50.833244', 'step': 16033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:50.869611', 'step': 16033, 'epoch': 3} {'type': 'loss', 'content': 0.06531540304422379, 'timestamp': '2025-10-01 04:32:50.871806', 'step': 16034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:50.906270', 'step': 16034, 'epoch': 3} {'type': 'loss', 'content': 0.06946887820959091, 'timestamp': '2025-10-01 04:32:50.908227', 'step': 16035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:50.941495', 'step': 16035, 'epoch': 3} {'type': 'loss', 'content': 0.06363682448863983, 'timestamp': '2025-10-01 04:32:50.965601', 'step': 16036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:51.005347', 'step': 16036, 'epoch': 3} {'type': 'loss', 'content': 0.05077754333615303, 'timestamp': '2025-10-01 04:32:51.007355', 'step': 16037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:51.040859', 'step': 16037, 'epoch': 3} {'type': 'loss', 'content': 0.17843297123908997, 'timestamp': '2025-10-01 04:32:51.042756', 'step': 16038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:51.084368', 'step': 16038, 'epoch': 3} {'type': 'loss', 'content': 0.11502178758382797, 'timestamp': '2025-10-01 04:32:51.086512', 'step': 16039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:51.125919', 'step': 16039, 'epoch': 3} {'type': 'loss', 'content': 0.06697217375040054, 'timestamp': '2025-10-01 04:32:51.149867', 'step': 16040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:51.182417', 'step': 16040, 'epoch': 3} {'type': 'loss', 'content': 0.12151385098695755, 'timestamp': '2025-10-01 04:32:51.184653', 'step': 16041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:51.227295', 'step': 16041, 'epoch': 3} {'type': 'loss', 'content': 0.12238269299268723, 'timestamp': '2025-10-01 04:32:51.229471', 'step': 16042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:51.273917', 'step': 16042, 'epoch': 3} {'type': 'loss', 'content': 0.06463254988193512, 'timestamp': '2025-10-01 04:32:51.275986', 'step': 16043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:51.319540', 'step': 16043, 'epoch': 3} {'type': 'loss', 'content': 0.14661316573619843, 'timestamp': '2025-10-01 04:32:51.343192', 'step': 16044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:51.376100', 'step': 16044, 'epoch': 3} {'type': 'loss', 'content': 0.07634815573692322, 'timestamp': '2025-10-01 04:32:51.378135', 'step': 16045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:51.412061', 'step': 16045, 'epoch': 3} {'type': 'loss', 'content': 0.08947473019361496, 'timestamp': '2025-10-01 04:32:51.414112', 'step': 16046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:51.446302', 'step': 16046, 'epoch': 3} {'type': 'loss', 'content': 0.09177545458078384, 'timestamp': '2025-10-01 04:32:51.449099', 'step': 16047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:51.487502', 'step': 16047, 'epoch': 3} {'type': 'loss', 'content': 0.07997708767652512, 'timestamp': '2025-10-01 04:32:51.511243', 'step': 16048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:51.545097', 'step': 16048, 'epoch': 3} {'type': 'loss', 'content': 0.07842777669429779, 'timestamp': '2025-10-01 04:32:51.547074', 'step': 16049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:51.580264', 'step': 16049, 'epoch': 3} {'type': 'loss', 'content': 0.06788840889930725, 'timestamp': '2025-10-01 04:32:51.582373', 'step': 16050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:51.626848', 'step': 16050, 'epoch': 3} {'type': 'loss', 'content': 0.12107968330383301, 'timestamp': '2025-10-01 04:32:51.628981', 'step': 16051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:51.661903', 'step': 16051, 'epoch': 3} {'type': 'loss', 'content': 0.0653083324432373, 'timestamp': '2025-10-01 04:32:51.685644', 'step': 16052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:51.721427', 'step': 16052, 'epoch': 3} {'type': 'loss', 'content': 0.14647844433784485, 'timestamp': '2025-10-01 04:32:51.723430', 'step': 16053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:51.761920', 'step': 16053, 'epoch': 3} {'type': 'loss', 'content': 0.022716118022799492, 'timestamp': '2025-10-01 04:32:51.763984', 'step': 16054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:51.796508', 'step': 16054, 'epoch': 3} {'type': 'loss', 'content': 0.08015364408493042, 'timestamp': '2025-10-01 04:32:51.798607', 'step': 16055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:51.831439', 'step': 16055, 'epoch': 3} {'type': 'loss', 'content': 0.1797126978635788, 'timestamp': '2025-10-01 04:32:51.855105', 'step': 16056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:51.897257', 'step': 16056, 'epoch': 3} {'type': 'loss', 'content': 0.05411701649427414, 'timestamp': '2025-10-01 04:32:51.899359', 'step': 16057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:51.932640', 'step': 16057, 'epoch': 3} {'type': 'loss', 'content': 0.09289774298667908, 'timestamp': '2025-10-01 04:32:51.934817', 'step': 16058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:51.968652', 'step': 16058, 'epoch': 3} {'type': 'loss', 'content': 0.06966986507177353, 'timestamp': '2025-10-01 04:32:51.970777', 'step': 16059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:52.004844', 'step': 16059, 'epoch': 3} {'type': 'loss', 'content': 0.0643841028213501, 'timestamp': '2025-10-01 04:32:52.028563', 'step': 16060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:52.077089', 'step': 16060, 'epoch': 3} {'type': 'loss', 'content': 0.06361924856901169, 'timestamp': '2025-10-01 04:32:52.079435', 'step': 16061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:52.119010', 'step': 16061, 'epoch': 3} {'type': 'loss', 'content': 0.055862318724393845, 'timestamp': '2025-10-01 04:32:52.121242', 'step': 16062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:52.155765', 'step': 16062, 'epoch': 3} {'type': 'loss', 'content': 0.07292087376117706, 'timestamp': '2025-10-01 04:32:52.157956', 'step': 16063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:52.199810', 'step': 16063, 'epoch': 3} {'type': 'loss', 'content': 0.10630207508802414, 'timestamp': '2025-10-01 04:32:52.223723', 'step': 16064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:32:52.257568', 'step': 16064, 'epoch': 3} {'type': 'loss', 'content': 0.0878666490316391, 'timestamp': '2025-10-01 04:32:52.259870', 'step': 16065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:52.294597', 'step': 16065, 'epoch': 3} {'type': 'loss', 'content': 0.03756851702928543, 'timestamp': '2025-10-01 04:32:52.296851', 'step': 16066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:52.330878', 'step': 16066, 'epoch': 3} {'type': 'loss', 'content': 0.0537395104765892, 'timestamp': '2025-10-01 04:32:52.333311', 'step': 16067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:52.365773', 'step': 16067, 'epoch': 3} {'type': 'loss', 'content': 0.045340802520513535, 'timestamp': '2025-10-01 04:32:52.389905', 'step': 16068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:52.424757', 'step': 16068, 'epoch': 3} {'type': 'loss', 'content': 0.10629193484783173, 'timestamp': '2025-10-01 04:32:52.427132', 'step': 16069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:52.464111', 'step': 16069, 'epoch': 3} {'type': 'loss', 'content': 0.08712445199489594, 'timestamp': '2025-10-01 04:32:52.468137', 'step': 16070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:52.502641', 'step': 16070, 'epoch': 3} {'type': 'loss', 'content': 0.08109458535909653, 'timestamp': '2025-10-01 04:32:52.504893', 'step': 16071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:52.539209', 'step': 16071, 'epoch': 3} {'type': 'loss', 'content': 0.04350699111819267, 'timestamp': '2025-10-01 04:32:52.563172', 'step': 16072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:52.596544', 'step': 16072, 'epoch': 3} {'type': 'loss', 'content': 0.060932449996471405, 'timestamp': '2025-10-01 04:32:52.598871', 'step': 16073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:52.631086', 'step': 16073, 'epoch': 3} {'type': 'loss', 'content': 0.06689850986003876, 'timestamp': '2025-10-01 04:32:52.634898', 'step': 16074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:52.674217', 'step': 16074, 'epoch': 3} {'type': 'loss', 'content': 0.06997022777795792, 'timestamp': '2025-10-01 04:32:52.676626', 'step': 16075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:32:52.714459', 'step': 16075, 'epoch': 3} {'type': 'loss', 'content': 0.043124668300151825, 'timestamp': '2025-10-01 04:32:52.741421', 'step': 16076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:52.780920', 'step': 16076, 'epoch': 3} {'type': 'loss', 'content': 0.09066609293222427, 'timestamp': '2025-10-01 04:32:52.784700', 'step': 16077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:52.820181', 'step': 16077, 'epoch': 3} {'type': 'loss', 'content': 0.09531549364328384, 'timestamp': '2025-10-01 04:32:52.823404', 'step': 16078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:52.871746', 'step': 16078, 'epoch': 3} {'type': 'loss', 'content': 0.0262437891215086, 'timestamp': '2025-10-01 04:32:52.873814', 'step': 16079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:52.906465', 'step': 16079, 'epoch': 3} {'type': 'loss', 'content': 0.03878013417124748, 'timestamp': '2025-10-01 04:32:52.930584', 'step': 16080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:52.967908', 'step': 16080, 'epoch': 3} {'type': 'loss', 'content': 0.09715094417333603, 'timestamp': '2025-10-01 04:32:52.970091', 'step': 16081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:53.004913', 'step': 16081, 'epoch': 3} {'type': 'loss', 'content': 0.11164762079715729, 'timestamp': '2025-10-01 04:32:53.007362', 'step': 16082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:53.039705', 'step': 16082, 'epoch': 3} {'type': 'loss', 'content': 0.03707888722419739, 'timestamp': '2025-10-01 04:32:53.041770', 'step': 16083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:53.073883', 'step': 16083, 'epoch': 3} {'type': 'loss', 'content': 0.020013507455587387, 'timestamp': '2025-10-01 04:32:53.097500', 'step': 16084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:53.129799', 'step': 16084, 'epoch': 3} {'type': 'loss', 'content': 0.09470891952514648, 'timestamp': '2025-10-01 04:32:53.131628', 'step': 16085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:53.165332', 'step': 16085, 'epoch': 3} {'type': 'loss', 'content': 0.06469636410474777, 'timestamp': '2025-10-01 04:32:53.167501', 'step': 16086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:53.199414', 'step': 16086, 'epoch': 3} {'type': 'loss', 'content': 0.08208701759576797, 'timestamp': '2025-10-01 04:32:53.201760', 'step': 16087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:53.232846', 'step': 16087, 'epoch': 3} {'type': 'loss', 'content': 0.034829601645469666, 'timestamp': '2025-10-01 04:32:53.256409', 'step': 16088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:53.291271', 'step': 16088, 'epoch': 3} {'type': 'loss', 'content': 0.050539981573820114, 'timestamp': '2025-10-01 04:32:53.293338', 'step': 16089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:53.329964', 'step': 16089, 'epoch': 3} {'type': 'loss', 'content': 0.12564349174499512, 'timestamp': '2025-10-01 04:32:53.332150', 'step': 16090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:53.367931', 'step': 16090, 'epoch': 3} {'type': 'loss', 'content': 0.07876375317573547, 'timestamp': '2025-10-01 04:32:53.370069', 'step': 16091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:53.404629', 'step': 16091, 'epoch': 3} {'type': 'loss', 'content': 0.09417757391929626, 'timestamp': '2025-10-01 04:32:53.428154', 'step': 16092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:53.466309', 'step': 16092, 'epoch': 3} {'type': 'loss', 'content': 0.05043242499232292, 'timestamp': '2025-10-01 04:32:53.468616', 'step': 16093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:53.499780', 'step': 16093, 'epoch': 3} {'type': 'loss', 'content': 0.10363247990608215, 'timestamp': '2025-10-01 04:32:53.502108', 'step': 16094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:53.533713', 'step': 16094, 'epoch': 3} {'type': 'loss', 'content': 0.0984591618180275, 'timestamp': '2025-10-01 04:32:53.536026', 'step': 16095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:53.569193', 'step': 16095, 'epoch': 3} {'type': 'loss', 'content': 0.12521769106388092, 'timestamp': '2025-10-01 04:32:53.592850', 'step': 16096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:53.626357', 'step': 16096, 'epoch': 3} {'type': 'loss', 'content': 0.05153805390000343, 'timestamp': '2025-10-01 04:32:53.628365', 'step': 16097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:53.661088', 'step': 16097, 'epoch': 3} {'type': 'loss', 'content': 0.09152732044458389, 'timestamp': '2025-10-01 04:32:53.663050', 'step': 16098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:53.695646', 'step': 16098, 'epoch': 3} {'type': 'loss', 'content': 0.09552278369665146, 'timestamp': '2025-10-01 04:32:53.697593', 'step': 16099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:53.731964', 'step': 16099, 'epoch': 3} {'type': 'loss', 'content': 0.11246034502983093, 'timestamp': '2025-10-01 04:32:53.759548', 'step': 16100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:53.792211', 'step': 16100, 'epoch': 3} {'type': 'loss', 'content': 0.07262881845235825, 'timestamp': '2025-10-01 04:32:53.794298', 'step': 16101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:53.826881', 'step': 16101, 'epoch': 3} {'type': 'loss', 'content': 0.07283253967761993, 'timestamp': '2025-10-01 04:32:53.828993', 'step': 16102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:53.861930', 'step': 16102, 'epoch': 3} {'type': 'loss', 'content': 0.07867860794067383, 'timestamp': '2025-10-01 04:32:53.864038', 'step': 16103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:53.895689', 'step': 16103, 'epoch': 3} {'type': 'loss', 'content': 0.036414455622434616, 'timestamp': '2025-10-01 04:32:53.920389', 'step': 16104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:53.966579', 'step': 16104, 'epoch': 3} {'type': 'loss', 'content': 0.03877422213554382, 'timestamp': '2025-10-01 04:32:53.977006', 'step': 16105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.007928', 'step': 16105, 'epoch': 3} {'type': 'loss', 'content': 0.04121466726064682, 'timestamp': '2025-10-01 04:32:54.010021', 'step': 16106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:54.041276', 'step': 16106, 'epoch': 3} {'type': 'loss', 'content': 0.10481978952884674, 'timestamp': '2025-10-01 04:32:54.043473', 'step': 16107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.081917', 'step': 16107, 'epoch': 3} {'type': 'loss', 'content': 0.07489451766014099, 'timestamp': '2025-10-01 04:32:54.105518', 'step': 16108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.139030', 'step': 16108, 'epoch': 3} {'type': 'loss', 'content': 0.0395650640130043, 'timestamp': '2025-10-01 04:32:54.141328', 'step': 16109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:54.187687', 'step': 16109, 'epoch': 3} {'type': 'loss', 'content': 0.049509190022945404, 'timestamp': '2025-10-01 04:32:54.189929', 'step': 16110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:54.222809', 'step': 16110, 'epoch': 3} {'type': 'loss', 'content': 0.08758411556482315, 'timestamp': '2025-10-01 04:32:54.225192', 'step': 16111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:54.267829', 'step': 16111, 'epoch': 3} {'type': 'loss', 'content': 0.04426661506295204, 'timestamp': '2025-10-01 04:32:54.291380', 'step': 16112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.322465', 'step': 16112, 'epoch': 3} {'type': 'loss', 'content': 0.11722750216722488, 'timestamp': '2025-10-01 04:32:54.324951', 'step': 16113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.371685', 'step': 16113, 'epoch': 3} {'type': 'loss', 'content': 0.09078405052423477, 'timestamp': '2025-10-01 04:32:54.373737', 'step': 16114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.408443', 'step': 16114, 'epoch': 3} {'type': 'loss', 'content': 0.08616717159748077, 'timestamp': '2025-10-01 04:32:54.410497', 'step': 16115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.457618', 'step': 16115, 'epoch': 3} {'type': 'loss', 'content': 0.045953940600156784, 'timestamp': '2025-10-01 04:32:54.481217', 'step': 16116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.516795', 'step': 16116, 'epoch': 3} {'type': 'loss', 'content': 0.039584286510944366, 'timestamp': '2025-10-01 04:32:54.518854', 'step': 16117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.553408', 'step': 16117, 'epoch': 3} {'type': 'loss', 'content': 0.10352243483066559, 'timestamp': '2025-10-01 04:32:54.559749', 'step': 16118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.599218', 'step': 16118, 'epoch': 3} {'type': 'loss', 'content': 0.03187037259340286, 'timestamp': '2025-10-01 04:32:54.601327', 'step': 16119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:54.637298', 'step': 16119, 'epoch': 3} {'type': 'loss', 'content': 0.07096564769744873, 'timestamp': '2025-10-01 04:32:54.660862', 'step': 16120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.704507', 'step': 16120, 'epoch': 3} {'type': 'loss', 'content': 0.014848013408482075, 'timestamp': '2025-10-01 04:32:54.706578', 'step': 16121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:54.742260', 'step': 16121, 'epoch': 3} {'type': 'loss', 'content': 0.03962787613272667, 'timestamp': '2025-10-01 04:32:54.744271', 'step': 16122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:54.777810', 'step': 16122, 'epoch': 3} {'type': 'loss', 'content': 0.06370483338832855, 'timestamp': '2025-10-01 04:32:54.780211', 'step': 16123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:54.813883', 'step': 16123, 'epoch': 3} {'type': 'loss', 'content': 0.08453905582427979, 'timestamp': '2025-10-01 04:32:54.837552', 'step': 16124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.868784', 'step': 16124, 'epoch': 3} {'type': 'loss', 'content': 0.08116097748279572, 'timestamp': '2025-10-01 04:32:54.870877', 'step': 16125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:54.902519', 'step': 16125, 'epoch': 3} {'type': 'loss', 'content': 0.16181756556034088, 'timestamp': '2025-10-01 04:32:54.904791', 'step': 16126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.936025', 'step': 16126, 'epoch': 3} {'type': 'loss', 'content': 0.08420625329017639, 'timestamp': '2025-10-01 04:32:54.938078', 'step': 16127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:54.997686', 'step': 16127, 'epoch': 3} {'type': 'loss', 'content': 0.04107589274644852, 'timestamp': '2025-10-01 04:32:55.021075', 'step': 16128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:55.052967', 'step': 16128, 'epoch': 3} {'type': 'loss', 'content': 0.0739845409989357, 'timestamp': '2025-10-01 04:32:55.054990', 'step': 16129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:55.085732', 'step': 16129, 'epoch': 3} {'type': 'loss', 'content': 0.13063986599445343, 'timestamp': '2025-10-01 04:32:55.088065', 'step': 16130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:55.118587', 'step': 16130, 'epoch': 3} {'type': 'loss', 'content': 0.026432111859321594, 'timestamp': '2025-10-01 04:32:55.120874', 'step': 16131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:32:55.152860', 'step': 16131, 'epoch': 3} {'type': 'loss', 'content': 0.09986823052167892, 'timestamp': '2025-10-01 04:32:55.180907', 'step': 16132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:55.212204', 'step': 16132, 'epoch': 3} {'type': 'loss', 'content': 0.05691061168909073, 'timestamp': '2025-10-01 04:32:55.214207', 'step': 16133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:55.245472', 'step': 16133, 'epoch': 3} {'type': 'loss', 'content': 0.05980044975876808, 'timestamp': '2025-10-01 04:32:55.247770', 'step': 16134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:55.278156', 'step': 16134, 'epoch': 3} {'type': 'loss', 'content': 0.06815250962972641, 'timestamp': '2025-10-01 04:32:55.280156', 'step': 16135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:55.311915', 'step': 16135, 'epoch': 3} {'type': 'loss', 'content': 0.0818224623799324, 'timestamp': '2025-10-01 04:32:55.335844', 'step': 16136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:55.366407', 'step': 16136, 'epoch': 3} {'type': 'loss', 'content': 0.06888117641210556, 'timestamp': '2025-10-01 04:32:55.368578', 'step': 16137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:55.399551', 'step': 16137, 'epoch': 3} {'type': 'loss', 'content': 0.079566590487957, 'timestamp': '2025-10-01 04:32:55.401663', 'step': 16138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:55.432080', 'step': 16138, 'epoch': 3} {'type': 'loss', 'content': 0.07871106266975403, 'timestamp': '2025-10-01 04:32:55.434405', 'step': 16139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:55.466258', 'step': 16139, 'epoch': 3} {'type': 'loss', 'content': 0.053184617310762405, 'timestamp': '2025-10-01 04:32:55.489871', 'step': 16140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:55.521307', 'step': 16140, 'epoch': 3} {'type': 'loss', 'content': 0.11162393540143967, 'timestamp': '2025-10-01 04:32:55.523469', 'step': 16141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:55.553915', 'step': 16141, 'epoch': 3} {'type': 'loss', 'content': 0.048130977898836136, 'timestamp': '2025-10-01 04:32:55.556132', 'step': 16142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:55.587499', 'step': 16142, 'epoch': 3} {'type': 'loss', 'content': 0.04254823178052902, 'timestamp': '2025-10-01 04:32:55.589752', 'step': 16143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:55.620244', 'step': 16143, 'epoch': 3} {'type': 'loss', 'content': 0.051582206040620804, 'timestamp': '2025-10-01 04:32:55.643743', 'step': 16144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:55.684454', 'step': 16144, 'epoch': 3} {'type': 'loss', 'content': 0.06191393360495567, 'timestamp': '2025-10-01 04:32:55.686515', 'step': 16145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:55.717323', 'step': 16145, 'epoch': 3} {'type': 'loss', 'content': 0.11611325293779373, 'timestamp': '2025-10-01 04:32:55.719646', 'step': 16146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:55.751245', 'step': 16146, 'epoch': 3} {'type': 'loss', 'content': 0.06979271024465561, 'timestamp': '2025-10-01 04:32:55.753394', 'step': 16147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:55.786555', 'step': 16147, 'epoch': 3} {'type': 'loss', 'content': 0.07542306929826736, 'timestamp': '2025-10-01 04:32:55.810493', 'step': 16148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:55.843080', 'step': 16148, 'epoch': 3} {'type': 'loss', 'content': 0.0402521938085556, 'timestamp': '2025-10-01 04:32:55.845061', 'step': 16149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:55.877503', 'step': 16149, 'epoch': 3} {'type': 'loss', 'content': 0.10201055556535721, 'timestamp': '2025-10-01 04:32:55.879566', 'step': 16150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:55.922504', 'step': 16150, 'epoch': 3} {'type': 'loss', 'content': 0.20201894640922546, 'timestamp': '2025-10-01 04:32:55.924939', 'step': 16151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:55.962868', 'step': 16151, 'epoch': 3} {'type': 'loss', 'content': 0.11766308546066284, 'timestamp': '2025-10-01 04:32:55.986651', 'step': 16152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.030544', 'step': 16152, 'epoch': 3} {'type': 'loss', 'content': 0.07000100612640381, 'timestamp': '2025-10-01 04:32:56.032877', 'step': 16153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:56.067451', 'step': 16153, 'epoch': 3} {'type': 'loss', 'content': 0.07987550646066666, 'timestamp': '2025-10-01 04:32:56.069451', 'step': 16154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.104985', 'step': 16154, 'epoch': 3} {'type': 'loss', 'content': 0.0444476455450058, 'timestamp': '2025-10-01 04:32:56.107102', 'step': 16155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.142246', 'step': 16155, 'epoch': 3} {'type': 'loss', 'content': 0.04075683280825615, 'timestamp': '2025-10-01 04:32:56.165742', 'step': 16156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:56.201091', 'step': 16156, 'epoch': 3} {'type': 'loss', 'content': 0.09185396879911423, 'timestamp': '2025-10-01 04:32:56.203143', 'step': 16157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-10-01 04:32:56.255859', 'step': 16157, 'epoch': 3} {'type': 'loss', 'content': 0.08607588708400726, 'timestamp': '2025-10-01 04:32:56.266521', 'step': 16158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:56.300247', 'step': 16158, 'epoch': 3} {'type': 'loss', 'content': 0.07468574494123459, 'timestamp': '2025-10-01 04:32:56.302259', 'step': 16159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.344050', 'step': 16159, 'epoch': 3} {'type': 'loss', 'content': 0.06717438250780106, 'timestamp': '2025-10-01 04:32:56.367582', 'step': 16160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:56.403661', 'step': 16160, 'epoch': 3} {'type': 'loss', 'content': 0.11478348076343536, 'timestamp': '2025-10-01 04:32:56.405817', 'step': 16161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:56.443687', 'step': 16161, 'epoch': 3} {'type': 'loss', 'content': 0.08696043491363525, 'timestamp': '2025-10-01 04:32:56.445776', 'step': 16162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:56.494330', 'step': 16162, 'epoch': 3} {'type': 'loss', 'content': 0.03877761587500572, 'timestamp': '2025-10-01 04:32:56.496440', 'step': 16163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.531079', 'step': 16163, 'epoch': 3} {'type': 'loss', 'content': 0.07109957188367844, 'timestamp': '2025-10-01 04:32:56.554570', 'step': 16164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.587137', 'step': 16164, 'epoch': 3} {'type': 'loss', 'content': 0.07086151838302612, 'timestamp': '2025-10-01 04:32:56.589409', 'step': 16165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:56.623137', 'step': 16165, 'epoch': 3} {'type': 'loss', 'content': 0.03763420134782791, 'timestamp': '2025-10-01 04:32:56.625519', 'step': 16166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:56.661309', 'step': 16166, 'epoch': 3} {'type': 'loss', 'content': 0.05366288125514984, 'timestamp': '2025-10-01 04:32:56.663673', 'step': 16167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.697575', 'step': 16167, 'epoch': 3} {'type': 'loss', 'content': 0.04979986324906349, 'timestamp': '2025-10-01 04:32:56.721029', 'step': 16168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.753727', 'step': 16168, 'epoch': 3} {'type': 'loss', 'content': 0.045505084097385406, 'timestamp': '2025-10-01 04:32:56.755903', 'step': 16169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.789308', 'step': 16169, 'epoch': 3} {'type': 'loss', 'content': 0.11182096600532532, 'timestamp': '2025-10-01 04:32:56.791433', 'step': 16170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:56.826565', 'step': 16170, 'epoch': 3} {'type': 'loss', 'content': 0.03975452855229378, 'timestamp': '2025-10-01 04:32:56.829291', 'step': 16171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:56.864711', 'step': 16171, 'epoch': 3} {'type': 'loss', 'content': 0.06048845127224922, 'timestamp': '2025-10-01 04:32:56.888449', 'step': 16172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.931141', 'step': 16172, 'epoch': 3} {'type': 'loss', 'content': 0.06791727989912033, 'timestamp': '2025-10-01 04:32:56.933111', 'step': 16173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:56.967337', 'step': 16173, 'epoch': 3} {'type': 'loss', 'content': 0.06156722083687782, 'timestamp': '2025-10-01 04:32:56.969531', 'step': 16174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.003819', 'step': 16174, 'epoch': 3} {'type': 'loss', 'content': 0.051429927349090576, 'timestamp': '2025-10-01 04:32:57.005911', 'step': 16175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:57.039901', 'step': 16175, 'epoch': 3} {'type': 'loss', 'content': 0.08689144998788834, 'timestamp': '2025-10-01 04:32:57.063620', 'step': 16176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.098029', 'step': 16176, 'epoch': 3} {'type': 'loss', 'content': 0.05100177973508835, 'timestamp': '2025-10-01 04:32:57.100525', 'step': 16177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.133549', 'step': 16177, 'epoch': 3} {'type': 'loss', 'content': 0.1108880564570427, 'timestamp': '2025-10-01 04:32:57.135948', 'step': 16178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.171381', 'step': 16178, 'epoch': 3} {'type': 'loss', 'content': 0.11696339398622513, 'timestamp': '2025-10-01 04:32:57.173480', 'step': 16179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.210270', 'step': 16179, 'epoch': 3} {'type': 'loss', 'content': 0.12985865771770477, 'timestamp': '2025-10-01 04:32:57.233933', 'step': 16180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:57.267703', 'step': 16180, 'epoch': 3} {'type': 'loss', 'content': 0.08572980761528015, 'timestamp': '2025-10-01 04:32:57.270238', 'step': 16181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:57.303857', 'step': 16181, 'epoch': 3} {'type': 'loss', 'content': 0.15039688348770142, 'timestamp': '2025-10-01 04:32:57.306575', 'step': 16182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.342608', 'step': 16182, 'epoch': 3} {'type': 'loss', 'content': 0.0784091204404831, 'timestamp': '2025-10-01 04:32:57.345022', 'step': 16183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.402131', 'step': 16183, 'epoch': 3} {'type': 'loss', 'content': 0.056290436536073685, 'timestamp': '2025-10-01 04:32:57.426019', 'step': 16184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.457033', 'step': 16184, 'epoch': 3} {'type': 'loss', 'content': 0.053017131984233856, 'timestamp': '2025-10-01 04:32:57.459485', 'step': 16185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:57.505158', 'step': 16185, 'epoch': 3} {'type': 'loss', 'content': 0.037541963160037994, 'timestamp': '2025-10-01 04:32:57.507622', 'step': 16186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.541056', 'step': 16186, 'epoch': 3} {'type': 'loss', 'content': 0.17699915170669556, 'timestamp': '2025-10-01 04:32:57.543364', 'step': 16187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:57.587419', 'step': 16187, 'epoch': 3} {'type': 'loss', 'content': 0.018515950068831444, 'timestamp': '2025-10-01 04:32:57.611443', 'step': 16188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:57.652600', 'step': 16188, 'epoch': 3} {'type': 'loss', 'content': 0.06441055983304977, 'timestamp': '2025-10-01 04:32:57.655092', 'step': 16189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:57.691408', 'step': 16189, 'epoch': 3} {'type': 'loss', 'content': 0.05961759015917778, 'timestamp': '2025-10-01 04:32:57.693664', 'step': 16190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.727797', 'step': 16190, 'epoch': 3} {'type': 'loss', 'content': 0.09324425458908081, 'timestamp': '2025-10-01 04:32:57.730254', 'step': 16191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:57.765415', 'step': 16191, 'epoch': 3} {'type': 'loss', 'content': 0.036013275384902954, 'timestamp': '2025-10-01 04:32:57.789133', 'step': 16192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.823676', 'step': 16192, 'epoch': 3} {'type': 'loss', 'content': 0.11320768296718597, 'timestamp': '2025-10-01 04:32:57.826302', 'step': 16193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:57.869509', 'step': 16193, 'epoch': 3} {'type': 'loss', 'content': 0.053760405629873276, 'timestamp': '2025-10-01 04:32:57.872900', 'step': 16194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:57.905569', 'step': 16194, 'epoch': 3} {'type': 'loss', 'content': 0.04758857190608978, 'timestamp': '2025-10-01 04:32:57.908489', 'step': 16195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:57.941735', 'step': 16195, 'epoch': 3} {'type': 'loss', 'content': 0.10513854771852493, 'timestamp': '2025-10-01 04:32:57.965696', 'step': 16196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:57.998903', 'step': 16196, 'epoch': 3} {'type': 'loss', 'content': 0.09143374860286713, 'timestamp': '2025-10-01 04:32:58.001648', 'step': 16197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:58.036354', 'step': 16197, 'epoch': 3} {'type': 'loss', 'content': 0.06667453795671463, 'timestamp': '2025-10-01 04:32:58.038917', 'step': 16198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:58.074667', 'step': 16198, 'epoch': 3} {'type': 'loss', 'content': 0.09220750629901886, 'timestamp': '2025-10-01 04:32:58.079333', 'step': 16199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:58.113813', 'step': 16199, 'epoch': 3} {'type': 'loss', 'content': 0.07628481090068817, 'timestamp': '2025-10-01 04:32:58.137832', 'step': 16200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:58.171212', 'step': 16200, 'epoch': 3} {'type': 'loss', 'content': 0.0737447738647461, 'timestamp': '2025-10-01 04:32:58.173844', 'step': 16201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:58.218424', 'step': 16201, 'epoch': 3} {'type': 'loss', 'content': 0.08158448338508606, 'timestamp': '2025-10-01 04:32:58.220781', 'step': 16202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:58.254538', 'step': 16202, 'epoch': 3} {'type': 'loss', 'content': 0.12076020985841751, 'timestamp': '2025-10-01 04:32:58.256601', 'step': 16203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:58.297309', 'step': 16203, 'epoch': 3} {'type': 'loss', 'content': 0.11440123617649078, 'timestamp': '2025-10-01 04:32:58.321421', 'step': 16204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:58.354196', 'step': 16204, 'epoch': 3} {'type': 'loss', 'content': 0.024203194305300713, 'timestamp': '2025-10-01 04:32:58.356577', 'step': 16205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:58.395339', 'step': 16205, 'epoch': 3} {'type': 'loss', 'content': 0.11249023675918579, 'timestamp': '2025-10-01 04:32:58.397758', 'step': 16206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:58.430651', 'step': 16206, 'epoch': 3} {'type': 'loss', 'content': 0.11103534698486328, 'timestamp': '2025-10-01 04:32:58.432987', 'step': 16207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:58.473228', 'step': 16207, 'epoch': 3} {'type': 'loss', 'content': 0.041153013706207275, 'timestamp': '2025-10-01 04:32:58.496895', 'step': 16208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:58.528999', 'step': 16208, 'epoch': 3} {'type': 'loss', 'content': 0.10718890279531479, 'timestamp': '2025-10-01 04:32:58.531492', 'step': 16209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:58.565570', 'step': 16209, 'epoch': 3} {'type': 'loss', 'content': 0.08998309075832367, 'timestamp': '2025-10-01 04:32:58.567902', 'step': 16210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:58.600191', 'step': 16210, 'epoch': 3} {'type': 'loss', 'content': 0.10023688524961472, 'timestamp': '2025-10-01 04:32:58.602412', 'step': 16211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:58.642245', 'step': 16211, 'epoch': 3} {'type': 'loss', 'content': 0.03539657220244408, 'timestamp': '2025-10-01 04:32:58.666570', 'step': 16212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:58.699041', 'step': 16212, 'epoch': 3} {'type': 'loss', 'content': 0.06538297235965729, 'timestamp': '2025-10-01 04:32:58.701911', 'step': 16213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:58.735789', 'step': 16213, 'epoch': 3} {'type': 'loss', 'content': 0.06386125087738037, 'timestamp': '2025-10-01 04:32:58.738558', 'step': 16214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:32:58.772149', 'step': 16214, 'epoch': 3} {'type': 'loss', 'content': 0.05070357024669647, 'timestamp': '2025-10-01 04:32:58.774350', 'step': 16215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:58.818316', 'step': 16215, 'epoch': 3} {'type': 'loss', 'content': 0.11689843237400055, 'timestamp': '2025-10-01 04:32:58.842090', 'step': 16216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:58.874856', 'step': 16216, 'epoch': 3} {'type': 'loss', 'content': 0.04065227136015892, 'timestamp': '2025-10-01 04:32:58.877048', 'step': 16217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:58.908952', 'step': 16217, 'epoch': 3} {'type': 'loss', 'content': 0.07288630306720734, 'timestamp': '2025-10-01 04:32:58.911062', 'step': 16218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:58.960661', 'step': 16218, 'epoch': 3} {'type': 'loss', 'content': 0.07300090044736862, 'timestamp': '2025-10-01 04:32:58.962776', 'step': 16219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:59.018380', 'step': 16219, 'epoch': 3} {'type': 'loss', 'content': 0.0420745313167572, 'timestamp': '2025-10-01 04:32:59.041890', 'step': 16220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:59.087236', 'step': 16220, 'epoch': 3} {'type': 'loss', 'content': 0.1494816243648529, 'timestamp': '2025-10-01 04:32:59.089362', 'step': 16221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:59.123331', 'step': 16221, 'epoch': 3} {'type': 'loss', 'content': 0.08070577681064606, 'timestamp': '2025-10-01 04:32:59.125824', 'step': 16222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:59.173247', 'step': 16222, 'epoch': 3} {'type': 'loss', 'content': 0.10931340605020523, 'timestamp': '2025-10-01 04:32:59.176164', 'step': 16223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:59.213281', 'step': 16223, 'epoch': 3} {'type': 'loss', 'content': 0.090805783867836, 'timestamp': '2025-10-01 04:32:59.237157', 'step': 16224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:59.273141', 'step': 16224, 'epoch': 3} {'type': 'loss', 'content': 0.13286545872688293, 'timestamp': '2025-10-01 04:32:59.275611', 'step': 16225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:59.310031', 'step': 16225, 'epoch': 3} {'type': 'loss', 'content': 0.1250218003988266, 'timestamp': '2025-10-01 04:32:59.312181', 'step': 16226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:59.346041', 'step': 16226, 'epoch': 3} {'type': 'loss', 'content': 0.13372738659381866, 'timestamp': '2025-10-01 04:32:59.348234', 'step': 16227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:59.381028', 'step': 16227, 'epoch': 3} {'type': 'loss', 'content': 0.07034235447645187, 'timestamp': '2025-10-01 04:32:59.404566', 'step': 16228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:59.447559', 'step': 16228, 'epoch': 3} {'type': 'loss', 'content': 0.05448194220662117, 'timestamp': '2025-10-01 04:32:59.449765', 'step': 16229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:32:59.487476', 'step': 16229, 'epoch': 3} {'type': 'loss', 'content': 0.04885341599583626, 'timestamp': '2025-10-01 04:32:59.493550', 'step': 16230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:32:59.536008', 'step': 16230, 'epoch': 3} {'type': 'loss', 'content': 0.06648989766836166, 'timestamp': '2025-10-01 04:32:59.538313', 'step': 16231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:32:59.575030', 'step': 16231, 'epoch': 3} {'type': 'loss', 'content': 0.058132998645305634, 'timestamp': '2025-10-01 04:32:59.598998', 'step': 16232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:59.632389', 'step': 16232, 'epoch': 3} {'type': 'loss', 'content': 0.12180352956056595, 'timestamp': '2025-10-01 04:32:59.639297', 'step': 16233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:59.693427', 'step': 16233, 'epoch': 3} {'type': 'loss', 'content': 0.0707298219203949, 'timestamp': '2025-10-01 04:32:59.695961', 'step': 16234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:59.729956', 'step': 16234, 'epoch': 3} {'type': 'loss', 'content': 0.031875599175691605, 'timestamp': '2025-10-01 04:32:59.732312', 'step': 16235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:59.765252', 'step': 16235, 'epoch': 3} {'type': 'loss', 'content': 0.10428149253129959, 'timestamp': '2025-10-01 04:32:59.788858', 'step': 16236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:59.822293', 'step': 16236, 'epoch': 3} {'type': 'loss', 'content': 0.03360062837600708, 'timestamp': '2025-10-01 04:32:59.824363', 'step': 16237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:59.860078', 'step': 16237, 'epoch': 3} {'type': 'loss', 'content': 0.05758530646562576, 'timestamp': '2025-10-01 04:32:59.862542', 'step': 16238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:32:59.905857', 'step': 16238, 'epoch': 3} {'type': 'loss', 'content': 0.03611385077238083, 'timestamp': '2025-10-01 04:32:59.908358', 'step': 16239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:32:59.948657', 'step': 16239, 'epoch': 3} {'type': 'loss', 'content': 0.018424494192004204, 'timestamp': '2025-10-01 04:32:59.972707', 'step': 16240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:00.006292', 'step': 16240, 'epoch': 3} {'type': 'loss', 'content': 0.11270812153816223, 'timestamp': '2025-10-01 04:33:00.008464', 'step': 16241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:33:00.043613', 'step': 16241, 'epoch': 3} {'type': 'loss', 'content': 0.051970016211271286, 'timestamp': '2025-10-01 04:33:00.048085', 'step': 16242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:00.093577', 'step': 16242, 'epoch': 3} {'type': 'loss', 'content': 0.06933093070983887, 'timestamp': '2025-10-01 04:33:00.095791', 'step': 16243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:00.131137', 'step': 16243, 'epoch': 3} {'type': 'loss', 'content': 0.09081624448299408, 'timestamp': '2025-10-01 04:33:00.154711', 'step': 16244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:00.188304', 'step': 16244, 'epoch': 3} {'type': 'loss', 'content': 0.08122185617685318, 'timestamp': '2025-10-01 04:33:00.190310', 'step': 16245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:00.224817', 'step': 16245, 'epoch': 3} {'type': 'loss', 'content': 0.0512678399682045, 'timestamp': '2025-10-01 04:33:00.227037', 'step': 16246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:00.260201', 'step': 16246, 'epoch': 3} {'type': 'loss', 'content': 0.09717204421758652, 'timestamp': '2025-10-01 04:33:00.262271', 'step': 16247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:00.294915', 'step': 16247, 'epoch': 3} {'type': 'loss', 'content': 0.06072545796632767, 'timestamp': '2025-10-01 04:33:00.318516', 'step': 16248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:00.350930', 'step': 16248, 'epoch': 3} {'type': 'loss', 'content': 0.09030472487211227, 'timestamp': '2025-10-01 04:33:00.353039', 'step': 16249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:00.387924', 'step': 16249, 'epoch': 3} {'type': 'loss', 'content': 0.02863132581114769, 'timestamp': '2025-10-01 04:33:00.390112', 'step': 16250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:00.435517', 'step': 16250, 'epoch': 3} {'type': 'loss', 'content': 0.08505316078662872, 'timestamp': '2025-10-01 04:33:00.437646', 'step': 16251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:00.475599', 'step': 16251, 'epoch': 3} {'type': 'loss', 'content': 0.1226101964712143, 'timestamp': '2025-10-01 04:33:00.499247', 'step': 16252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:00.536147', 'step': 16252, 'epoch': 3} {'type': 'loss', 'content': 0.08422978222370148, 'timestamp': '2025-10-01 04:33:00.538584', 'step': 16253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:00.571729', 'step': 16253, 'epoch': 3} {'type': 'loss', 'content': 0.06629779189825058, 'timestamp': '2025-10-01 04:33:00.574180', 'step': 16254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:00.609008', 'step': 16254, 'epoch': 3} {'type': 'loss', 'content': 0.08469251543283463, 'timestamp': '2025-10-01 04:33:00.611530', 'step': 16255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:00.645205', 'step': 16255, 'epoch': 3} {'type': 'loss', 'content': 0.05044332519173622, 'timestamp': '2025-10-01 04:33:00.668951', 'step': 16256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:00.704305', 'step': 16256, 'epoch': 3} {'type': 'loss', 'content': 0.06995213776826859, 'timestamp': '2025-10-01 04:33:00.706425', 'step': 16257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:00.740069', 'step': 16257, 'epoch': 3} {'type': 'loss', 'content': 0.035448070615530014, 'timestamp': '2025-10-01 04:33:00.742173', 'step': 16258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:00.776849', 'step': 16258, 'epoch': 3} {'type': 'loss', 'content': 0.10600577294826508, 'timestamp': '2025-10-01 04:33:00.778994', 'step': 16259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:00.818984', 'step': 16259, 'epoch': 3} {'type': 'loss', 'content': 0.028358928859233856, 'timestamp': '2025-10-01 04:33:00.842595', 'step': 16260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:00.883335', 'step': 16260, 'epoch': 3} {'type': 'loss', 'content': 0.07713396847248077, 'timestamp': '2025-10-01 04:33:00.885768', 'step': 16261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:00.919767', 'step': 16261, 'epoch': 3} {'type': 'loss', 'content': 0.07701745629310608, 'timestamp': '2025-10-01 04:33:00.922071', 'step': 16262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:00.959812', 'step': 16262, 'epoch': 3} {'type': 'loss', 'content': 0.03489881753921509, 'timestamp': '2025-10-01 04:33:00.961950', 'step': 16263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:00.995311', 'step': 16263, 'epoch': 3} {'type': 'loss', 'content': 0.04354573413729668, 'timestamp': '2025-10-01 04:33:01.019017', 'step': 16264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:01.051093', 'step': 16264, 'epoch': 3} {'type': 'loss', 'content': 0.1108088567852974, 'timestamp': '2025-10-01 04:33:01.053239', 'step': 16265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.087904', 'step': 16265, 'epoch': 3} {'type': 'loss', 'content': 0.023542441427707672, 'timestamp': '2025-10-01 04:33:01.090147', 'step': 16266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:01.123852', 'step': 16266, 'epoch': 3} {'type': 'loss', 'content': 0.0908067598938942, 'timestamp': '2025-10-01 04:33:01.126075', 'step': 16267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.161083', 'step': 16267, 'epoch': 3} {'type': 'loss', 'content': 0.13486219942569733, 'timestamp': '2025-10-01 04:33:01.184891', 'step': 16268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.216968', 'step': 16268, 'epoch': 3} {'type': 'loss', 'content': 0.13826113939285278, 'timestamp': '2025-10-01 04:33:01.219382', 'step': 16269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:01.251424', 'step': 16269, 'epoch': 3} {'type': 'loss', 'content': 0.05497618392109871, 'timestamp': '2025-10-01 04:33:01.253755', 'step': 16270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.288221', 'step': 16270, 'epoch': 3} {'type': 'loss', 'content': 0.09639307111501694, 'timestamp': '2025-10-01 04:33:01.290757', 'step': 16271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:01.323464', 'step': 16271, 'epoch': 3} {'type': 'loss', 'content': 0.04126385226845741, 'timestamp': '2025-10-01 04:33:01.359584', 'step': 16272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.391994', 'step': 16272, 'epoch': 3} {'type': 'loss', 'content': 0.0815431997179985, 'timestamp': '2025-10-01 04:33:01.394075', 'step': 16273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.428683', 'step': 16273, 'epoch': 3} {'type': 'loss', 'content': 0.08481559157371521, 'timestamp': '2025-10-01 04:33:01.430909', 'step': 16274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.462471', 'step': 16274, 'epoch': 3} {'type': 'loss', 'content': 0.12588690221309662, 'timestamp': '2025-10-01 04:33:01.464699', 'step': 16275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:01.498623', 'step': 16275, 'epoch': 3} {'type': 'loss', 'content': 0.04879793897271156, 'timestamp': '2025-10-01 04:33:01.522341', 'step': 16276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:01.552944', 'step': 16276, 'epoch': 3} {'type': 'loss', 'content': 0.09174375236034393, 'timestamp': '2025-10-01 04:33:01.555407', 'step': 16277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.587001', 'step': 16277, 'epoch': 3} {'type': 'loss', 'content': 0.11637857556343079, 'timestamp': '2025-10-01 04:33:01.589740', 'step': 16278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:01.624843', 'step': 16278, 'epoch': 3} {'type': 'loss', 'content': 0.1404280960559845, 'timestamp': '2025-10-01 04:33:01.627214', 'step': 16279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:01.658735', 'step': 16279, 'epoch': 3} {'type': 'loss', 'content': 0.09549432247877121, 'timestamp': '2025-10-01 04:33:01.682525', 'step': 16280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.715611', 'step': 16280, 'epoch': 3} {'type': 'loss', 'content': 0.054904066026210785, 'timestamp': '2025-10-01 04:33:01.717774', 'step': 16281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:01.755270', 'step': 16281, 'epoch': 3} {'type': 'loss', 'content': 0.04647887498140335, 'timestamp': '2025-10-01 04:33:01.757874', 'step': 16282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:01.790983', 'step': 16282, 'epoch': 3} {'type': 'loss', 'content': 0.06725285202264786, 'timestamp': '2025-10-01 04:33:01.794146', 'step': 16283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.833107', 'step': 16283, 'epoch': 3} {'type': 'loss', 'content': 0.10449580103158951, 'timestamp': '2025-10-01 04:33:01.856719', 'step': 16284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:01.886925', 'step': 16284, 'epoch': 3} {'type': 'loss', 'content': 0.042284365743398666, 'timestamp': '2025-10-01 04:33:01.889316', 'step': 16285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:01.922585', 'step': 16285, 'epoch': 3} {'type': 'loss', 'content': 0.07086893916130066, 'timestamp': '2025-10-01 04:33:01.925456', 'step': 16286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:01.957000', 'step': 16286, 'epoch': 3} {'type': 'loss', 'content': 0.12580400705337524, 'timestamp': '2025-10-01 04:33:01.959444', 'step': 16287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:01.990083', 'step': 16287, 'epoch': 3} {'type': 'loss', 'content': 0.056680772453546524, 'timestamp': '2025-10-01 04:33:02.013816', 'step': 16288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:02.045224', 'step': 16288, 'epoch': 3} {'type': 'loss', 'content': 0.05876580625772476, 'timestamp': '2025-10-01 04:33:02.047594', 'step': 16289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:02.080073', 'step': 16289, 'epoch': 3} {'type': 'loss', 'content': 0.06046917662024498, 'timestamp': '2025-10-01 04:33:02.082536', 'step': 16290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:02.115812', 'step': 16290, 'epoch': 3} {'type': 'loss', 'content': 0.07346098124980927, 'timestamp': '2025-10-01 04:33:02.117993', 'step': 16291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:02.152763', 'step': 16291, 'epoch': 3} {'type': 'loss', 'content': 0.06431739777326584, 'timestamp': '2025-10-01 04:33:02.176512', 'step': 16292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:02.208884', 'step': 16292, 'epoch': 3} {'type': 'loss', 'content': 0.006570752244442701, 'timestamp': '2025-10-01 04:33:02.212173', 'step': 16293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:02.244514', 'step': 16293, 'epoch': 3} {'type': 'loss', 'content': 0.10744775086641312, 'timestamp': '2025-10-01 04:33:02.246825', 'step': 16294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:02.279509', 'step': 16294, 'epoch': 3} {'type': 'loss', 'content': 0.035743895918130875, 'timestamp': '2025-10-01 04:33:02.282573', 'step': 16295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:02.314286', 'step': 16295, 'epoch': 3} {'type': 'loss', 'content': 0.08301278203725815, 'timestamp': '2025-10-01 04:33:02.337846', 'step': 16296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:02.369628', 'step': 16296, 'epoch': 3} {'type': 'loss', 'content': 0.056784678250551224, 'timestamp': '2025-10-01 04:33:02.371887', 'step': 16297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:02.402803', 'step': 16297, 'epoch': 3} {'type': 'loss', 'content': 0.12389395385980606, 'timestamp': '2025-10-01 04:33:02.405625', 'step': 16298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:02.436991', 'step': 16298, 'epoch': 3} {'type': 'loss', 'content': 0.07377488166093826, 'timestamp': '2025-10-01 04:33:02.439639', 'step': 16299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:02.475872', 'step': 16299, 'epoch': 3} {'type': 'loss', 'content': 0.047427184879779816, 'timestamp': '2025-10-01 04:33:02.499589', 'step': 16300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:02.530370', 'step': 16300, 'epoch': 3} {'type': 'loss', 'content': 0.0620969757437706, 'timestamp': '2025-10-01 04:33:02.532765', 'step': 16301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:02.565474', 'step': 16301, 'epoch': 3} {'type': 'loss', 'content': 0.07694680243730545, 'timestamp': '2025-10-01 04:33:02.567629', 'step': 16302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:02.598287', 'step': 16302, 'epoch': 3} {'type': 'loss', 'content': 0.05873648449778557, 'timestamp': '2025-10-01 04:33:02.600857', 'step': 16303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:02.635958', 'step': 16303, 'epoch': 3} {'type': 'loss', 'content': 0.019287366420030594, 'timestamp': '2025-10-01 04:33:02.659719', 'step': 16304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:02.691613', 'step': 16304, 'epoch': 3} {'type': 'loss', 'content': 0.113649882376194, 'timestamp': '2025-10-01 04:33:02.693740', 'step': 16305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:02.725296', 'step': 16305, 'epoch': 3} {'type': 'loss', 'content': 0.123084157705307, 'timestamp': '2025-10-01 04:33:02.728116', 'step': 16306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:02.760209', 'step': 16306, 'epoch': 3} {'type': 'loss', 'content': 0.05153883621096611, 'timestamp': '2025-10-01 04:33:02.762379', 'step': 16307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:02.793615', 'step': 16307, 'epoch': 3} {'type': 'loss', 'content': 0.07666629552841187, 'timestamp': '2025-10-01 04:33:02.817355', 'step': 16308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:02.848501', 'step': 16308, 'epoch': 3} {'type': 'loss', 'content': 0.09498360008001328, 'timestamp': '2025-10-01 04:33:02.850634', 'step': 16309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:02.880970', 'step': 16309, 'epoch': 3} {'type': 'loss', 'content': 0.09300349652767181, 'timestamp': '2025-10-01 04:33:02.883115', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:33:12.340072', 'step': 16310, 'epoch': 3} {'type': 'pplx', 'content': 10535.614196379795, 'timestamp': '2025-10-01 04:33:12.343365', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:12.374172', 'step': 16310, 'epoch': 3} {'type': 'loss', 'content': 0.08076555281877518, 'timestamp': '2025-10-01 04:33:12.378119', 'step': 16311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:12.425565', 'step': 16311, 'epoch': 3} {'type': 'loss', 'content': 0.027991965413093567, 'timestamp': '2025-10-01 04:33:12.452843', 'step': 16312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:12.491386', 'step': 16312, 'epoch': 3} {'type': 'loss', 'content': 0.0546964667737484, 'timestamp': '2025-10-01 04:33:12.493953', 'step': 16313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:12.525345', 'step': 16313, 'epoch': 3} {'type': 'loss', 'content': 0.0809120386838913, 'timestamp': '2025-10-01 04:33:12.528184', 'step': 16314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:12.559884', 'step': 16314, 'epoch': 3} {'type': 'loss', 'content': 0.05639483407139778, 'timestamp': '2025-10-01 04:33:12.562548', 'step': 16315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:12.594268', 'step': 16315, 'epoch': 3} {'type': 'loss', 'content': 0.13448849320411682, 'timestamp': '2025-10-01 04:33:12.618346', 'step': 16316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:12.650194', 'step': 16316, 'epoch': 3} {'type': 'loss', 'content': 0.08385910093784332, 'timestamp': '2025-10-01 04:33:12.657644', 'step': 16317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:12.690803', 'step': 16317, 'epoch': 3} {'type': 'loss', 'content': 0.040148936212062836, 'timestamp': '2025-10-01 04:33:12.693085', 'step': 16318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:12.725162', 'step': 16318, 'epoch': 3} {'type': 'loss', 'content': 0.04486420378088951, 'timestamp': '2025-10-01 04:33:12.727819', 'step': 16319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:12.761054', 'step': 16319, 'epoch': 3} {'type': 'loss', 'content': 0.09275704622268677, 'timestamp': '2025-10-01 04:33:12.785092', 'step': 16320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:12.818281', 'step': 16320, 'epoch': 3} {'type': 'loss', 'content': 0.059507180005311966, 'timestamp': '2025-10-01 04:33:12.821023', 'step': 16321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:12.853546', 'step': 16321, 'epoch': 3} {'type': 'loss', 'content': 0.0643000528216362, 'timestamp': '2025-10-01 04:33:12.856009', 'step': 16322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:12.889674', 'step': 16322, 'epoch': 3} {'type': 'loss', 'content': 0.03299105912446976, 'timestamp': '2025-10-01 04:33:12.892339', 'step': 16323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:12.923773', 'step': 16323, 'epoch': 3} {'type': 'loss', 'content': 0.06661826372146606, 'timestamp': '2025-10-01 04:33:12.947648', 'step': 16324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:12.979003', 'step': 16324, 'epoch': 3} {'type': 'loss', 'content': 0.05572788044810295, 'timestamp': '2025-10-01 04:33:12.984821', 'step': 16325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:13.017687', 'step': 16325, 'epoch': 3} {'type': 'loss', 'content': 0.051691941916942596, 'timestamp': '2025-10-01 04:33:13.020719', 'step': 16326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:13.051149', 'step': 16326, 'epoch': 3} {'type': 'loss', 'content': 0.0770532414317131, 'timestamp': '2025-10-01 04:33:13.053590', 'step': 16327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:13.090806', 'step': 16327, 'epoch': 3} {'type': 'loss', 'content': 0.1264255940914154, 'timestamp': '2025-10-01 04:33:13.114640', 'step': 16328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:13.145827', 'step': 16328, 'epoch': 3} {'type': 'loss', 'content': 0.04658382385969162, 'timestamp': '2025-10-01 04:33:13.148010', 'step': 16329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:13.188046', 'step': 16329, 'epoch': 3} {'type': 'loss', 'content': 0.04756782948970795, 'timestamp': '2025-10-01 04:33:13.190638', 'step': 16330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:13.221967', 'step': 16330, 'epoch': 3} {'type': 'loss', 'content': 0.034701693803071976, 'timestamp': '2025-10-01 04:33:13.224395', 'step': 16331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:13.255778', 'step': 16331, 'epoch': 3} {'type': 'loss', 'content': 0.05077574774622917, 'timestamp': '2025-10-01 04:33:13.280028', 'step': 16332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:13.310690', 'step': 16332, 'epoch': 3} {'type': 'loss', 'content': 0.1088317409157753, 'timestamp': '2025-10-01 04:33:13.313967', 'step': 16333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:13.344599', 'step': 16333, 'epoch': 3} {'type': 'loss', 'content': 0.06861399114131927, 'timestamp': '2025-10-01 04:33:13.347063', 'step': 16334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:13.381246', 'step': 16334, 'epoch': 3} {'type': 'loss', 'content': 0.09227131307125092, 'timestamp': '2025-10-01 04:33:13.384894', 'step': 16335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:13.415682', 'step': 16335, 'epoch': 3} {'type': 'loss', 'content': 0.07067419588565826, 'timestamp': '2025-10-01 04:33:13.439762', 'step': 16336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:13.470152', 'step': 16336, 'epoch': 3} {'type': 'loss', 'content': 0.15826007723808289, 'timestamp': '2025-10-01 04:33:13.479160', 'step': 16337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:13.512031', 'step': 16337, 'epoch': 3} {'type': 'loss', 'content': 0.08486676961183548, 'timestamp': '2025-10-01 04:33:13.514881', 'step': 16338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:13.550263', 'step': 16338, 'epoch': 3} {'type': 'loss', 'content': 0.10060470551252365, 'timestamp': '2025-10-01 04:33:13.552579', 'step': 16339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:13.587544', 'step': 16339, 'epoch': 3} {'type': 'loss', 'content': 0.10150555521249771, 'timestamp': '2025-10-01 04:33:13.611346', 'step': 16340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:13.642939', 'step': 16340, 'epoch': 3} {'type': 'loss', 'content': 0.024067558348178864, 'timestamp': '2025-10-01 04:33:13.645173', 'step': 16341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:13.679432', 'step': 16341, 'epoch': 3} {'type': 'loss', 'content': 0.16582177579402924, 'timestamp': '2025-10-01 04:33:13.682348', 'step': 16342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:13.713692', 'step': 16342, 'epoch': 3} {'type': 'loss', 'content': 0.036334723234176636, 'timestamp': '2025-10-01 04:33:13.716048', 'step': 16343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:13.746069', 'step': 16343, 'epoch': 3} {'type': 'loss', 'content': 0.10853739082813263, 'timestamp': '2025-10-01 04:33:13.771120', 'step': 16344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:13.810250', 'step': 16344, 'epoch': 3} {'type': 'loss', 'content': 0.09458290040493011, 'timestamp': '2025-10-01 04:33:13.812521', 'step': 16345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:13.843373', 'step': 16345, 'epoch': 3} {'type': 'loss', 'content': 0.10069786757230759, 'timestamp': '2025-10-01 04:33:13.845569', 'step': 16346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:13.876633', 'step': 16346, 'epoch': 3} {'type': 'loss', 'content': 0.036085985600948334, 'timestamp': '2025-10-01 04:33:13.879497', 'step': 16347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:13.911289', 'step': 16347, 'epoch': 3} {'type': 'loss', 'content': 0.06619782000780106, 'timestamp': '2025-10-01 04:33:13.935027', 'step': 16348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:13.968031', 'step': 16348, 'epoch': 3} {'type': 'loss', 'content': 0.015603636391460896, 'timestamp': '2025-10-01 04:33:13.970559', 'step': 16349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:14.002046', 'step': 16349, 'epoch': 3} {'type': 'loss', 'content': 0.08023504912853241, 'timestamp': '2025-10-01 04:33:14.004238', 'step': 16350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:14.039676', 'step': 16350, 'epoch': 3} {'type': 'loss', 'content': 0.07724957168102264, 'timestamp': '2025-10-01 04:33:14.041867', 'step': 16351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:14.080496', 'step': 16351, 'epoch': 3} {'type': 'loss', 'content': 0.06807629764080048, 'timestamp': '2025-10-01 04:33:14.105040', 'step': 16352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:14.137222', 'step': 16352, 'epoch': 3} {'type': 'loss', 'content': 0.05223909392952919, 'timestamp': '2025-10-01 04:33:14.139198', 'step': 16353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:14.171341', 'step': 16353, 'epoch': 3} {'type': 'loss', 'content': 0.10600640624761581, 'timestamp': '2025-10-01 04:33:14.173715', 'step': 16354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:14.206570', 'step': 16354, 'epoch': 3} {'type': 'loss', 'content': 0.04839111119508743, 'timestamp': '2025-10-01 04:33:14.208843', 'step': 16355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:14.240200', 'step': 16355, 'epoch': 3} {'type': 'loss', 'content': 0.047103527933359146, 'timestamp': '2025-10-01 04:33:14.264561', 'step': 16356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:14.295590', 'step': 16356, 'epoch': 3} {'type': 'loss', 'content': 0.052719444036483765, 'timestamp': '2025-10-01 04:33:14.302324', 'step': 16357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:14.337767', 'step': 16357, 'epoch': 3} {'type': 'loss', 'content': 0.06299895793199539, 'timestamp': '2025-10-01 04:33:14.340063', 'step': 16358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:14.371584', 'step': 16358, 'epoch': 3} {'type': 'loss', 'content': 0.06783583015203476, 'timestamp': '2025-10-01 04:33:14.375479', 'step': 16359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:14.408539', 'step': 16359, 'epoch': 3} {'type': 'loss', 'content': 0.10431575030088425, 'timestamp': '2025-10-01 04:33:14.432292', 'step': 16360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:14.463843', 'step': 16360, 'epoch': 3} {'type': 'loss', 'content': 0.04550408571958542, 'timestamp': '2025-10-01 04:33:14.466188', 'step': 16361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:14.496453', 'step': 16361, 'epoch': 3} {'type': 'loss', 'content': 0.11996597796678543, 'timestamp': '2025-10-01 04:33:14.500448', 'step': 16362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:14.532872', 'step': 16362, 'epoch': 3} {'type': 'loss', 'content': 0.06852823495864868, 'timestamp': '2025-10-01 04:33:14.538475', 'step': 16363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:14.576236', 'step': 16363, 'epoch': 3} {'type': 'loss', 'content': 0.1440110206604004, 'timestamp': '2025-10-01 04:33:14.599975', 'step': 16364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:14.630613', 'step': 16364, 'epoch': 3} {'type': 'loss', 'content': 0.18111461400985718, 'timestamp': '2025-10-01 04:33:14.632660', 'step': 16365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:14.664365', 'step': 16365, 'epoch': 3} {'type': 'loss', 'content': 0.025502081960439682, 'timestamp': '2025-10-01 04:33:14.666620', 'step': 16366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:14.697324', 'step': 16366, 'epoch': 3} {'type': 'loss', 'content': 0.09275099635124207, 'timestamp': '2025-10-01 04:33:14.699436', 'step': 16367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:14.730286', 'step': 16367, 'epoch': 3} {'type': 'loss', 'content': 0.10143186151981354, 'timestamp': '2025-10-01 04:33:14.753965', 'step': 16368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:14.784410', 'step': 16368, 'epoch': 3} {'type': 'loss', 'content': 0.039708152413368225, 'timestamp': '2025-10-01 04:33:14.786544', 'step': 16369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:14.820319', 'step': 16369, 'epoch': 3} {'type': 'loss', 'content': 0.10054130852222443, 'timestamp': '2025-10-01 04:33:14.822996', 'step': 16370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:14.856789', 'step': 16370, 'epoch': 3} {'type': 'loss', 'content': 0.15847359597682953, 'timestamp': '2025-10-01 04:33:14.859759', 'step': 16371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:14.890374', 'step': 16371, 'epoch': 3} {'type': 'loss', 'content': 0.05952659621834755, 'timestamp': '2025-10-01 04:33:14.914005', 'step': 16372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:14.947588', 'step': 16372, 'epoch': 3} {'type': 'loss', 'content': 0.07377868890762329, 'timestamp': '2025-10-01 04:33:14.949638', 'step': 16373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:14.979679', 'step': 16373, 'epoch': 3} {'type': 'loss', 'content': 0.022080402821302414, 'timestamp': '2025-10-01 04:33:14.981904', 'step': 16374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:15.012127', 'step': 16374, 'epoch': 3} {'type': 'loss', 'content': 0.1070004254579544, 'timestamp': '2025-10-01 04:33:15.014586', 'step': 16375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.048191', 'step': 16375, 'epoch': 3} {'type': 'loss', 'content': 0.027408231049776077, 'timestamp': '2025-10-01 04:33:15.071757', 'step': 16376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.102679', 'step': 16376, 'epoch': 3} {'type': 'loss', 'content': 0.008410096168518066, 'timestamp': '2025-10-01 04:33:15.104854', 'step': 16377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.135089', 'step': 16377, 'epoch': 3} {'type': 'loss', 'content': 0.03216972202062607, 'timestamp': '2025-10-01 04:33:15.137384', 'step': 16378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.167554', 'step': 16378, 'epoch': 3} {'type': 'loss', 'content': 0.09253444522619247, 'timestamp': '2025-10-01 04:33:15.169639', 'step': 16379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:15.206292', 'step': 16379, 'epoch': 3} {'type': 'loss', 'content': 0.060535136610269547, 'timestamp': '2025-10-01 04:33:15.229985', 'step': 16380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:15.260692', 'step': 16380, 'epoch': 3} {'type': 'loss', 'content': 0.04092561826109886, 'timestamp': '2025-10-01 04:33:15.262864', 'step': 16381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:15.293222', 'step': 16381, 'epoch': 3} {'type': 'loss', 'content': 0.08546971529722214, 'timestamp': '2025-10-01 04:33:15.295312', 'step': 16382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.325835', 'step': 16382, 'epoch': 3} {'type': 'loss', 'content': 0.11367291957139969, 'timestamp': '2025-10-01 04:33:15.327963', 'step': 16383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:15.358597', 'step': 16383, 'epoch': 3} {'type': 'loss', 'content': 0.11349033564329147, 'timestamp': '2025-10-01 04:33:15.382528', 'step': 16384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:15.413706', 'step': 16384, 'epoch': 3} {'type': 'loss', 'content': 0.062179043889045715, 'timestamp': '2025-10-01 04:33:15.415869', 'step': 16385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:15.447291', 'step': 16385, 'epoch': 3} {'type': 'loss', 'content': 0.05686251446604729, 'timestamp': '2025-10-01 04:33:15.449442', 'step': 16386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.480609', 'step': 16386, 'epoch': 3} {'type': 'loss', 'content': 0.08588146418333054, 'timestamp': '2025-10-01 04:33:15.490383', 'step': 16387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:15.536236', 'step': 16387, 'epoch': 3} {'type': 'loss', 'content': 0.1287500262260437, 'timestamp': '2025-10-01 04:33:15.559799', 'step': 16388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.590804', 'step': 16388, 'epoch': 3} {'type': 'loss', 'content': 0.13908414542675018, 'timestamp': '2025-10-01 04:33:15.592964', 'step': 16389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.623544', 'step': 16389, 'epoch': 3} {'type': 'loss', 'content': 0.08997177332639694, 'timestamp': '2025-10-01 04:33:15.625590', 'step': 16390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:15.656546', 'step': 16390, 'epoch': 3} {'type': 'loss', 'content': 0.04961201921105385, 'timestamp': '2025-10-01 04:33:15.658617', 'step': 16391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:15.689148', 'step': 16391, 'epoch': 3} {'type': 'loss', 'content': 0.0501549132168293, 'timestamp': '2025-10-01 04:33:15.712941', 'step': 16392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.744359', 'step': 16392, 'epoch': 3} {'type': 'loss', 'content': 0.04191837087273598, 'timestamp': '2025-10-01 04:33:15.746623', 'step': 16393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:15.776898', 'step': 16393, 'epoch': 3} {'type': 'loss', 'content': 0.07711271941661835, 'timestamp': '2025-10-01 04:33:15.779123', 'step': 16394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:15.809348', 'step': 16394, 'epoch': 3} {'type': 'loss', 'content': 0.05702155455946922, 'timestamp': '2025-10-01 04:33:15.820784', 'step': 16395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.851686', 'step': 16395, 'epoch': 3} {'type': 'loss', 'content': 0.06035564839839935, 'timestamp': '2025-10-01 04:33:15.875450', 'step': 16396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:15.906352', 'step': 16396, 'epoch': 3} {'type': 'loss', 'content': 0.09532254934310913, 'timestamp': '2025-10-01 04:33:15.908463', 'step': 16397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:15.939425', 'step': 16397, 'epoch': 3} {'type': 'loss', 'content': 0.023405104875564575, 'timestamp': '2025-10-01 04:33:15.946320', 'step': 16398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:15.977432', 'step': 16398, 'epoch': 3} {'type': 'loss', 'content': 0.07640933245420456, 'timestamp': '2025-10-01 04:33:15.979499', 'step': 16399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.010370', 'step': 16399, 'epoch': 3} {'type': 'loss', 'content': 0.07292588800191879, 'timestamp': '2025-10-01 04:33:16.034150', 'step': 16400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:16.065130', 'step': 16400, 'epoch': 3} {'type': 'loss', 'content': 0.054959170520305634, 'timestamp': '2025-10-01 04:33:16.067282', 'step': 16401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.097865', 'step': 16401, 'epoch': 3} {'type': 'loss', 'content': 0.0670253112912178, 'timestamp': '2025-10-01 04:33:16.100016', 'step': 16402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:16.131621', 'step': 16402, 'epoch': 3} {'type': 'loss', 'content': 0.05448634549975395, 'timestamp': '2025-10-01 04:33:16.133733', 'step': 16403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.163896', 'step': 16403, 'epoch': 3} {'type': 'loss', 'content': 0.059959251433610916, 'timestamp': '2025-10-01 04:33:16.187568', 'step': 16404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.217479', 'step': 16404, 'epoch': 3} {'type': 'loss', 'content': 0.06245869770646095, 'timestamp': '2025-10-01 04:33:16.219753', 'step': 16405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.252395', 'step': 16405, 'epoch': 3} {'type': 'loss', 'content': 0.03814980015158653, 'timestamp': '2025-10-01 04:33:16.254657', 'step': 16406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:16.286696', 'step': 16406, 'epoch': 3} {'type': 'loss', 'content': 0.08197826147079468, 'timestamp': '2025-10-01 04:33:16.288903', 'step': 16407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:16.321536', 'step': 16407, 'epoch': 3} {'type': 'loss', 'content': 0.07671806961297989, 'timestamp': '2025-10-01 04:33:16.345218', 'step': 16408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:16.376590', 'step': 16408, 'epoch': 3} {'type': 'loss', 'content': 0.08360139280557632, 'timestamp': '2025-10-01 04:33:16.380599', 'step': 16409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:16.411798', 'step': 16409, 'epoch': 3} {'type': 'loss', 'content': 0.07163190096616745, 'timestamp': '2025-10-01 04:33:16.413981', 'step': 16410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:16.447429', 'step': 16410, 'epoch': 3} {'type': 'loss', 'content': 0.0785379409790039, 'timestamp': '2025-10-01 04:33:16.449575', 'step': 16411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:16.479918', 'step': 16411, 'epoch': 3} {'type': 'loss', 'content': 0.025157013908028603, 'timestamp': '2025-10-01 04:33:16.503541', 'step': 16412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:16.534347', 'step': 16412, 'epoch': 3} {'type': 'loss', 'content': 0.0442158579826355, 'timestamp': '2025-10-01 04:33:16.536495', 'step': 16413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.572368', 'step': 16413, 'epoch': 3} {'type': 'loss', 'content': 0.06499592959880829, 'timestamp': '2025-10-01 04:33:16.576365', 'step': 16414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:16.610494', 'step': 16414, 'epoch': 3} {'type': 'loss', 'content': 0.05965767800807953, 'timestamp': '2025-10-01 04:33:16.612732', 'step': 16415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.644789', 'step': 16415, 'epoch': 3} {'type': 'loss', 'content': 0.17801716923713684, 'timestamp': '2025-10-01 04:33:16.669118', 'step': 16416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:16.700527', 'step': 16416, 'epoch': 3} {'type': 'loss', 'content': 0.06675585359334946, 'timestamp': '2025-10-01 04:33:16.702793', 'step': 16417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:16.734688', 'step': 16417, 'epoch': 3} {'type': 'loss', 'content': 0.07510305196046829, 'timestamp': '2025-10-01 04:33:16.737091', 'step': 16418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:16.767003', 'step': 16418, 'epoch': 3} {'type': 'loss', 'content': 0.015589235350489616, 'timestamp': '2025-10-01 04:33:16.769267', 'step': 16419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.800904', 'step': 16419, 'epoch': 3} {'type': 'loss', 'content': 0.08242173492908478, 'timestamp': '2025-10-01 04:33:16.824711', 'step': 16420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.855296', 'step': 16420, 'epoch': 3} {'type': 'loss', 'content': 0.12499110400676727, 'timestamp': '2025-10-01 04:33:16.857775', 'step': 16421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.895229', 'step': 16421, 'epoch': 3} {'type': 'loss', 'content': 0.11702378839254379, 'timestamp': '2025-10-01 04:33:16.897623', 'step': 16422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:16.936850', 'step': 16422, 'epoch': 3} {'type': 'loss', 'content': 0.08490326255559921, 'timestamp': '2025-10-01 04:33:16.939092', 'step': 16423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:16.971297', 'step': 16423, 'epoch': 3} {'type': 'loss', 'content': 0.08135605603456497, 'timestamp': '2025-10-01 04:33:16.996633', 'step': 16424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.041594', 'step': 16424, 'epoch': 3} {'type': 'loss', 'content': 0.09527381509542465, 'timestamp': '2025-10-01 04:33:17.043722', 'step': 16425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:17.074834', 'step': 16425, 'epoch': 3} {'type': 'loss', 'content': 0.08248397707939148, 'timestamp': '2025-10-01 04:33:17.077056', 'step': 16426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.108627', 'step': 16426, 'epoch': 3} {'type': 'loss', 'content': 0.011197610758244991, 'timestamp': '2025-10-01 04:33:17.111341', 'step': 16427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:17.143711', 'step': 16427, 'epoch': 3} {'type': 'loss', 'content': 0.030404502525925636, 'timestamp': '2025-10-01 04:33:17.167420', 'step': 16428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:17.199381', 'step': 16428, 'epoch': 3} {'type': 'loss', 'content': 0.10359644889831543, 'timestamp': '2025-10-01 04:33:17.201628', 'step': 16429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:17.232546', 'step': 16429, 'epoch': 3} {'type': 'loss', 'content': 0.055697083473205566, 'timestamp': '2025-10-01 04:33:17.234865', 'step': 16430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:33:17.266734', 'step': 16430, 'epoch': 3} {'type': 'loss', 'content': 0.06391467899084091, 'timestamp': '2025-10-01 04:33:17.271070', 'step': 16431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.305578', 'step': 16431, 'epoch': 3} {'type': 'loss', 'content': 0.07687301933765411, 'timestamp': '2025-10-01 04:33:17.330835', 'step': 16432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.367212', 'step': 16432, 'epoch': 3} {'type': 'loss', 'content': 0.06335485726594925, 'timestamp': '2025-10-01 04:33:17.369345', 'step': 16433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.401452', 'step': 16433, 'epoch': 3} {'type': 'loss', 'content': 0.13660407066345215, 'timestamp': '2025-10-01 04:33:17.405950', 'step': 16434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:17.436662', 'step': 16434, 'epoch': 3} {'type': 'loss', 'content': 0.045513954013586044, 'timestamp': '2025-10-01 04:33:17.439035', 'step': 16435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.474585', 'step': 16435, 'epoch': 3} {'type': 'loss', 'content': 0.07918666303157806, 'timestamp': '2025-10-01 04:33:17.498362', 'step': 16436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.529913', 'step': 16436, 'epoch': 3} {'type': 'loss', 'content': 0.04469789192080498, 'timestamp': '2025-10-01 04:33:17.532583', 'step': 16437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.563593', 'step': 16437, 'epoch': 3} {'type': 'loss', 'content': 0.05317794531583786, 'timestamp': '2025-10-01 04:33:17.565831', 'step': 16438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:17.597888', 'step': 16438, 'epoch': 3} {'type': 'loss', 'content': 0.10343809425830841, 'timestamp': '2025-10-01 04:33:17.600135', 'step': 16439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.631974', 'step': 16439, 'epoch': 3} {'type': 'loss', 'content': 0.032663892954587936, 'timestamp': '2025-10-01 04:33:17.655735', 'step': 16440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.686978', 'step': 16440, 'epoch': 3} {'type': 'loss', 'content': 0.009605488739907742, 'timestamp': '2025-10-01 04:33:17.689432', 'step': 16441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:17.720226', 'step': 16441, 'epoch': 3} {'type': 'loss', 'content': 0.037811510264873505, 'timestamp': '2025-10-01 04:33:17.722485', 'step': 16442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:17.753359', 'step': 16442, 'epoch': 3} {'type': 'loss', 'content': 0.0447227917611599, 'timestamp': '2025-10-01 04:33:17.756125', 'step': 16443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:17.786592', 'step': 16443, 'epoch': 3} {'type': 'loss', 'content': 0.05345043167471886, 'timestamp': '2025-10-01 04:33:17.810575', 'step': 16444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:17.841734', 'step': 16444, 'epoch': 3} {'type': 'loss', 'content': 0.11860397458076477, 'timestamp': '2025-10-01 04:33:17.843900', 'step': 16445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:17.874879', 'step': 16445, 'epoch': 3} {'type': 'loss', 'content': 0.08292635530233383, 'timestamp': '2025-10-01 04:33:17.877478', 'step': 16446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.908041', 'step': 16446, 'epoch': 3} {'type': 'loss', 'content': 0.0699189305305481, 'timestamp': '2025-10-01 04:33:17.910397', 'step': 16447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:17.940676', 'step': 16447, 'epoch': 3} {'type': 'loss', 'content': 0.0782109722495079, 'timestamp': '2025-10-01 04:33:17.964450', 'step': 16448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:17.994706', 'step': 16448, 'epoch': 3} {'type': 'loss', 'content': 0.09309939295053482, 'timestamp': '2025-10-01 04:33:17.997112', 'step': 16449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.027612', 'step': 16449, 'epoch': 3} {'type': 'loss', 'content': 0.023432672023773193, 'timestamp': '2025-10-01 04:33:18.030128', 'step': 16450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.060422', 'step': 16450, 'epoch': 3} {'type': 'loss', 'content': 0.05155784264206886, 'timestamp': '2025-10-01 04:33:18.062694', 'step': 16451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.092716', 'step': 16451, 'epoch': 3} {'type': 'loss', 'content': 0.03749534487724304, 'timestamp': '2025-10-01 04:33:18.116337', 'step': 16452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:18.147015', 'step': 16452, 'epoch': 3} {'type': 'loss', 'content': 0.03711869195103645, 'timestamp': '2025-10-01 04:33:18.149093', 'step': 16453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.178992', 'step': 16453, 'epoch': 3} {'type': 'loss', 'content': 0.04019100219011307, 'timestamp': '2025-10-01 04:33:18.181114', 'step': 16454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:18.210775', 'step': 16454, 'epoch': 3} {'type': 'loss', 'content': 0.08969990909099579, 'timestamp': '2025-10-01 04:33:18.213178', 'step': 16455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:18.244399', 'step': 16455, 'epoch': 3} {'type': 'loss', 'content': 0.028008459135890007, 'timestamp': '2025-10-01 04:33:18.268130', 'step': 16456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:18.298921', 'step': 16456, 'epoch': 3} {'type': 'loss', 'content': 0.018726173788309097, 'timestamp': '2025-10-01 04:33:18.301183', 'step': 16457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:18.331749', 'step': 16457, 'epoch': 3} {'type': 'loss', 'content': 0.09615927934646606, 'timestamp': '2025-10-01 04:33:18.334071', 'step': 16458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.366193', 'step': 16458, 'epoch': 3} {'type': 'loss', 'content': 0.04233758524060249, 'timestamp': '2025-10-01 04:33:18.368314', 'step': 16459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:18.398456', 'step': 16459, 'epoch': 3} {'type': 'loss', 'content': 0.04605892673134804, 'timestamp': '2025-10-01 04:33:18.422170', 'step': 16460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.453245', 'step': 16460, 'epoch': 3} {'type': 'loss', 'content': 0.11434932053089142, 'timestamp': '2025-10-01 04:33:18.455438', 'step': 16461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:18.486258', 'step': 16461, 'epoch': 3} {'type': 'loss', 'content': 0.0654824748635292, 'timestamp': '2025-10-01 04:33:18.488363', 'step': 16462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.518694', 'step': 16462, 'epoch': 3} {'type': 'loss', 'content': 0.030700433999300003, 'timestamp': '2025-10-01 04:33:18.520896', 'step': 16463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.553150', 'step': 16463, 'epoch': 3} {'type': 'loss', 'content': 0.05662636458873749, 'timestamp': '2025-10-01 04:33:18.577006', 'step': 16464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.608419', 'step': 16464, 'epoch': 3} {'type': 'loss', 'content': 0.08672847598791122, 'timestamp': '2025-10-01 04:33:18.610726', 'step': 16465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.640388', 'step': 16465, 'epoch': 3} {'type': 'loss', 'content': 0.028280731290578842, 'timestamp': '2025-10-01 04:33:18.642685', 'step': 16466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:18.673284', 'step': 16466, 'epoch': 3} {'type': 'loss', 'content': 0.054541219025850296, 'timestamp': '2025-10-01 04:33:18.675436', 'step': 16467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:18.705786', 'step': 16467, 'epoch': 3} {'type': 'loss', 'content': 0.04357906058430672, 'timestamp': '2025-10-01 04:33:18.729420', 'step': 16468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:18.760905', 'step': 16468, 'epoch': 3} {'type': 'loss', 'content': 0.07270365208387375, 'timestamp': '2025-10-01 04:33:18.763693', 'step': 16469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:18.793811', 'step': 16469, 'epoch': 3} {'type': 'loss', 'content': 0.05838248133659363, 'timestamp': '2025-10-01 04:33:18.795980', 'step': 16470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:18.827069', 'step': 16470, 'epoch': 3} {'type': 'loss', 'content': 0.05807381868362427, 'timestamp': '2025-10-01 04:33:18.829794', 'step': 16471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.867011', 'step': 16471, 'epoch': 3} {'type': 'loss', 'content': 0.017065048217773438, 'timestamp': '2025-10-01 04:33:18.890541', 'step': 16472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.921134', 'step': 16472, 'epoch': 3} {'type': 'loss', 'content': 0.07761678099632263, 'timestamp': '2025-10-01 04:33:18.923513', 'step': 16473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:18.955107', 'step': 16473, 'epoch': 3} {'type': 'loss', 'content': 0.05018502473831177, 'timestamp': '2025-10-01 04:33:18.957322', 'step': 16474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:18.988497', 'step': 16474, 'epoch': 3} {'type': 'loss', 'content': 0.11057315766811371, 'timestamp': '2025-10-01 04:33:18.990968', 'step': 16475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:19.021503', 'step': 16475, 'epoch': 3} {'type': 'loss', 'content': 0.12837624549865723, 'timestamp': '2025-10-01 04:33:19.045159', 'step': 16476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.077443', 'step': 16476, 'epoch': 3} {'type': 'loss', 'content': 0.06345750391483307, 'timestamp': '2025-10-01 04:33:19.079809', 'step': 16477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.109933', 'step': 16477, 'epoch': 3} {'type': 'loss', 'content': 0.03277893736958504, 'timestamp': '2025-10-01 04:33:19.112273', 'step': 16478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:19.143148', 'step': 16478, 'epoch': 3} {'type': 'loss', 'content': 0.037313949316740036, 'timestamp': '2025-10-01 04:33:19.145792', 'step': 16479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.176459', 'step': 16479, 'epoch': 3} {'type': 'loss', 'content': 0.0817873477935791, 'timestamp': '2025-10-01 04:33:19.200251', 'step': 16480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.232139', 'step': 16480, 'epoch': 3} {'type': 'loss', 'content': 0.04961074888706207, 'timestamp': '2025-10-01 04:33:19.234313', 'step': 16481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:19.265170', 'step': 16481, 'epoch': 3} {'type': 'loss', 'content': 0.008109179325401783, 'timestamp': '2025-10-01 04:33:19.267337', 'step': 16482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:19.297883', 'step': 16482, 'epoch': 3} {'type': 'loss', 'content': 0.07380206137895584, 'timestamp': '2025-10-01 04:33:19.300308', 'step': 16483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.330545', 'step': 16483, 'epoch': 3} {'type': 'loss', 'content': 0.04968208074569702, 'timestamp': '2025-10-01 04:33:19.354095', 'step': 16484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:19.384351', 'step': 16484, 'epoch': 3} {'type': 'loss', 'content': 0.0601716972887516, 'timestamp': '2025-10-01 04:33:19.386450', 'step': 16485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:19.417457', 'step': 16485, 'epoch': 3} {'type': 'loss', 'content': 0.08433172106742859, 'timestamp': '2025-10-01 04:33:19.419642', 'step': 16486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:19.454071', 'step': 16486, 'epoch': 3} {'type': 'loss', 'content': 0.04077095910906792, 'timestamp': '2025-10-01 04:33:19.456130', 'step': 16487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.486816', 'step': 16487, 'epoch': 3} {'type': 'loss', 'content': 0.05106591433286667, 'timestamp': '2025-10-01 04:33:19.511043', 'step': 16488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:19.542096', 'step': 16488, 'epoch': 3} {'type': 'loss', 'content': 0.08131963759660721, 'timestamp': '2025-10-01 04:33:19.546882', 'step': 16489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:19.582266', 'step': 16489, 'epoch': 3} {'type': 'loss', 'content': 0.11879628151655197, 'timestamp': '2025-10-01 04:33:19.584231', 'step': 16490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:19.615369', 'step': 16490, 'epoch': 3} {'type': 'loss', 'content': 0.04771244153380394, 'timestamp': '2025-10-01 04:33:19.617624', 'step': 16491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:19.655300', 'step': 16491, 'epoch': 3} {'type': 'loss', 'content': 0.06885669380426407, 'timestamp': '2025-10-01 04:33:19.679007', 'step': 16492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:19.709977', 'step': 16492, 'epoch': 3} {'type': 'loss', 'content': 0.02662736363708973, 'timestamp': '2025-10-01 04:33:19.712814', 'step': 16493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:33:19.744166', 'step': 16493, 'epoch': 3} {'type': 'loss', 'content': 0.059638895094394684, 'timestamp': '2025-10-01 04:33:19.750328', 'step': 16494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.781323', 'step': 16494, 'epoch': 3} {'type': 'loss', 'content': 0.08439627289772034, 'timestamp': '2025-10-01 04:33:19.783647', 'step': 16495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:19.815174', 'step': 16495, 'epoch': 3} {'type': 'loss', 'content': 0.08816789835691452, 'timestamp': '2025-10-01 04:33:19.838950', 'step': 16496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.869758', 'step': 16496, 'epoch': 3} {'type': 'loss', 'content': 0.07137718796730042, 'timestamp': '2025-10-01 04:33:19.871909', 'step': 16497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.903269', 'step': 16497, 'epoch': 3} {'type': 'loss', 'content': 0.09926566481590271, 'timestamp': '2025-10-01 04:33:19.905764', 'step': 16498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.937559', 'step': 16498, 'epoch': 3} {'type': 'loss', 'content': 0.027046646922826767, 'timestamp': '2025-10-01 04:33:19.939703', 'step': 16499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:19.969948', 'step': 16499, 'epoch': 3} {'type': 'loss', 'content': 0.036985184997320175, 'timestamp': '2025-10-01 04:33:19.993630', 'step': 16500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16500', 'timestamp': '2025-10-01 04:33:24.812814', 'step': 16500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:24.854182', 'step': 16500, 'epoch': 3} {'type': 'loss', 'content': 0.052597809582948685, 'timestamp': '2025-10-01 04:33:24.856412', 'step': 16501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:24.886749', 'step': 16501, 'epoch': 3} {'type': 'loss', 'content': 0.1316547691822052, 'timestamp': '2025-10-01 04:33:24.888969', 'step': 16502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:24.919948', 'step': 16502, 'epoch': 3} {'type': 'loss', 'content': 0.07526081800460815, 'timestamp': '2025-10-01 04:33:24.922339', 'step': 16503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:24.952714', 'step': 16503, 'epoch': 3} {'type': 'loss', 'content': 0.02809571847319603, 'timestamp': '2025-10-01 04:33:24.976529', 'step': 16504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:25.009615', 'step': 16504, 'epoch': 3} {'type': 'loss', 'content': 0.03857124224305153, 'timestamp': '2025-10-01 04:33:25.012502', 'step': 16505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:25.045589', 'step': 16505, 'epoch': 3} {'type': 'loss', 'content': 0.08185082674026489, 'timestamp': '2025-10-01 04:33:25.047753', 'step': 16506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:25.079032', 'step': 16506, 'epoch': 3} {'type': 'loss', 'content': 0.09938468039035797, 'timestamp': '2025-10-01 04:33:25.081474', 'step': 16507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:25.112929', 'step': 16507, 'epoch': 3} {'type': 'loss', 'content': 0.027664830908179283, 'timestamp': '2025-10-01 04:33:25.136664', 'step': 16508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:25.167427', 'step': 16508, 'epoch': 3} {'type': 'loss', 'content': 0.032459940761327744, 'timestamp': '2025-10-01 04:33:25.169591', 'step': 16509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:25.201096', 'step': 16509, 'epoch': 3} {'type': 'loss', 'content': 0.08232386410236359, 'timestamp': '2025-10-01 04:33:25.203395', 'step': 16510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:25.234964', 'step': 16510, 'epoch': 3} {'type': 'loss', 'content': 0.07400164753198624, 'timestamp': '2025-10-01 04:33:25.237516', 'step': 16511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:25.267603', 'step': 16511, 'epoch': 3} {'type': 'loss', 'content': 0.08978932350873947, 'timestamp': '2025-10-01 04:33:25.291354', 'step': 16512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:25.322745', 'step': 16512, 'epoch': 3} {'type': 'loss', 'content': 0.07978001981973648, 'timestamp': '2025-10-01 04:33:25.324934', 'step': 16513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:25.360935', 'step': 16513, 'epoch': 3} {'type': 'loss', 'content': 0.05382758378982544, 'timestamp': '2025-10-01 04:33:25.363543', 'step': 16514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:25.393783', 'step': 16514, 'epoch': 3} {'type': 'loss', 'content': 0.058371350169181824, 'timestamp': '2025-10-01 04:33:25.395941', 'step': 16515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:25.426221', 'step': 16515, 'epoch': 3} {'type': 'loss', 'content': 0.05971159413456917, 'timestamp': '2025-10-01 04:33:25.449904', 'step': 16516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:25.480028', 'step': 16516, 'epoch': 3} {'type': 'loss', 'content': 0.0921982005238533, 'timestamp': '2025-10-01 04:33:25.482063', 'step': 16517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:25.513234', 'step': 16517, 'epoch': 3} {'type': 'loss', 'content': 0.028823861852288246, 'timestamp': '2025-10-01 04:33:25.515350', 'step': 16518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:25.545824', 'step': 16518, 'epoch': 3} {'type': 'loss', 'content': 0.059919700026512146, 'timestamp': '2025-10-01 04:33:25.548055', 'step': 16519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:25.579235', 'step': 16519, 'epoch': 3} {'type': 'loss', 'content': 0.10795724391937256, 'timestamp': '2025-10-01 04:33:25.602899', 'step': 16520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:25.634191', 'step': 16520, 'epoch': 3} {'type': 'loss', 'content': 0.05122489109635353, 'timestamp': '2025-10-01 04:33:25.647055', 'step': 16521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:25.677592', 'step': 16521, 'epoch': 3} {'type': 'loss', 'content': 0.10499558597803116, 'timestamp': '2025-10-01 04:33:25.680153', 'step': 16522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:33:25.713408', 'step': 16522, 'epoch': 3} {'type': 'loss', 'content': 0.05966506153345108, 'timestamp': '2025-10-01 04:33:25.717630', 'step': 16523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:25.749308', 'step': 16523, 'epoch': 3} {'type': 'loss', 'content': 0.09074069559574127, 'timestamp': '2025-10-01 04:33:25.773003', 'step': 16524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:25.812966', 'step': 16524, 'epoch': 3} {'type': 'loss', 'content': 0.011563414707779884, 'timestamp': '2025-10-01 04:33:25.815094', 'step': 16525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:25.845508', 'step': 16525, 'epoch': 3} {'type': 'loss', 'content': 0.022013435140252113, 'timestamp': '2025-10-01 04:33:25.847774', 'step': 16526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:25.888370', 'step': 16526, 'epoch': 3} {'type': 'loss', 'content': 0.02349216490983963, 'timestamp': '2025-10-01 04:33:25.890603', 'step': 16527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:25.921641', 'step': 16527, 'epoch': 3} {'type': 'loss', 'content': 0.06078816205263138, 'timestamp': '2025-10-01 04:33:25.945089', 'step': 16528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:25.984231', 'step': 16528, 'epoch': 3} {'type': 'loss', 'content': 0.04043630510568619, 'timestamp': '2025-10-01 04:33:25.986451', 'step': 16529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:26.018154', 'step': 16529, 'epoch': 3} {'type': 'loss', 'content': 0.049514539539813995, 'timestamp': '2025-10-01 04:33:26.020270', 'step': 16530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:26.051244', 'step': 16530, 'epoch': 3} {'type': 'loss', 'content': 0.09295651316642761, 'timestamp': '2025-10-01 04:33:26.053357', 'step': 16531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:26.085574', 'step': 16531, 'epoch': 3} {'type': 'loss', 'content': 0.06611216813325882, 'timestamp': '2025-10-01 04:33:26.109274', 'step': 16532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.141256', 'step': 16532, 'epoch': 3} {'type': 'loss', 'content': 0.07166916131973267, 'timestamp': '2025-10-01 04:33:26.143361', 'step': 16533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:26.174966', 'step': 16533, 'epoch': 3} {'type': 'loss', 'content': 0.04434346780180931, 'timestamp': '2025-10-01 04:33:26.177170', 'step': 16534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.207562', 'step': 16534, 'epoch': 3} {'type': 'loss', 'content': 0.19897575676441193, 'timestamp': '2025-10-01 04:33:26.210051', 'step': 16535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.241899', 'step': 16535, 'epoch': 3} {'type': 'loss', 'content': 0.07289952039718628, 'timestamp': '2025-10-01 04:33:26.265678', 'step': 16536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.297088', 'step': 16536, 'epoch': 3} {'type': 'loss', 'content': 0.06907918304204941, 'timestamp': '2025-10-01 04:33:26.300437', 'step': 16537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.330805', 'step': 16537, 'epoch': 3} {'type': 'loss', 'content': 0.06323538720607758, 'timestamp': '2025-10-01 04:33:26.333039', 'step': 16538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:26.363191', 'step': 16538, 'epoch': 3} {'type': 'loss', 'content': 0.0414721742272377, 'timestamp': '2025-10-01 04:33:26.365482', 'step': 16539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:26.395855', 'step': 16539, 'epoch': 3} {'type': 'loss', 'content': 0.06539646536111832, 'timestamp': '2025-10-01 04:33:26.419987', 'step': 16540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:26.450608', 'step': 16540, 'epoch': 3} {'type': 'loss', 'content': 0.10831435024738312, 'timestamp': '2025-10-01 04:33:26.452706', 'step': 16541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:26.484329', 'step': 16541, 'epoch': 3} {'type': 'loss', 'content': 0.06719562411308289, 'timestamp': '2025-10-01 04:33:26.486458', 'step': 16542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:26.517086', 'step': 16542, 'epoch': 3} {'type': 'loss', 'content': 0.11360087245702744, 'timestamp': '2025-10-01 04:33:26.519821', 'step': 16543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.550986', 'step': 16543, 'epoch': 3} {'type': 'loss', 'content': 0.07645329087972641, 'timestamp': '2025-10-01 04:33:26.574567', 'step': 16544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.605923', 'step': 16544, 'epoch': 3} {'type': 'loss', 'content': 0.05883924290537834, 'timestamp': '2025-10-01 04:33:26.608399', 'step': 16545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.640232', 'step': 16545, 'epoch': 3} {'type': 'loss', 'content': 0.06407992541790009, 'timestamp': '2025-10-01 04:33:26.642395', 'step': 16546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:26.676201', 'step': 16546, 'epoch': 3} {'type': 'loss', 'content': 0.05827992036938667, 'timestamp': '2025-10-01 04:33:26.678545', 'step': 16547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.709848', 'step': 16547, 'epoch': 3} {'type': 'loss', 'content': 0.03173369541764259, 'timestamp': '2025-10-01 04:33:26.733736', 'step': 16548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.765703', 'step': 16548, 'epoch': 3} {'type': 'loss', 'content': 0.06491362303495407, 'timestamp': '2025-10-01 04:33:26.768038', 'step': 16549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:26.799215', 'step': 16549, 'epoch': 3} {'type': 'loss', 'content': 0.11842560023069382, 'timestamp': '2025-10-01 04:33:26.801652', 'step': 16550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:26.832677', 'step': 16550, 'epoch': 3} {'type': 'loss', 'content': 0.030531272292137146, 'timestamp': '2025-10-01 04:33:26.835034', 'step': 16551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:26.868002', 'step': 16551, 'epoch': 3} {'type': 'loss', 'content': 0.056487955152988434, 'timestamp': '2025-10-01 04:33:26.891598', 'step': 16552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:26.922877', 'step': 16552, 'epoch': 3} {'type': 'loss', 'content': 0.05854085087776184, 'timestamp': '2025-10-01 04:33:26.933342', 'step': 16553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.965233', 'step': 16553, 'epoch': 3} {'type': 'loss', 'content': 0.0998968556523323, 'timestamp': '2025-10-01 04:33:26.967457', 'step': 16554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:26.997975', 'step': 16554, 'epoch': 3} {'type': 'loss', 'content': 0.04843159392476082, 'timestamp': '2025-10-01 04:33:27.000179', 'step': 16555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.032680', 'step': 16555, 'epoch': 3} {'type': 'loss', 'content': 0.0499773807823658, 'timestamp': '2025-10-01 04:33:27.056377', 'step': 16556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.088142', 'step': 16556, 'epoch': 3} {'type': 'loss', 'content': 0.15377341210842133, 'timestamp': '2025-10-01 04:33:27.090281', 'step': 16557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.121336', 'step': 16557, 'epoch': 3} {'type': 'loss', 'content': 0.07405424863100052, 'timestamp': '2025-10-01 04:33:27.123441', 'step': 16558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:27.155300', 'step': 16558, 'epoch': 3} {'type': 'loss', 'content': 0.09391161799430847, 'timestamp': '2025-10-01 04:33:27.157573', 'step': 16559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:27.188804', 'step': 16559, 'epoch': 3} {'type': 'loss', 'content': 0.02633914351463318, 'timestamp': '2025-10-01 04:33:27.212548', 'step': 16560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:27.245067', 'step': 16560, 'epoch': 3} {'type': 'loss', 'content': 0.11389073729515076, 'timestamp': '2025-10-01 04:33:27.247111', 'step': 16561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:27.278684', 'step': 16561, 'epoch': 3} {'type': 'loss', 'content': 0.06829875707626343, 'timestamp': '2025-10-01 04:33:27.280864', 'step': 16562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.311132', 'step': 16562, 'epoch': 3} {'type': 'loss', 'content': 0.05355541408061981, 'timestamp': '2025-10-01 04:33:27.313255', 'step': 16563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:27.343546', 'step': 16563, 'epoch': 3} {'type': 'loss', 'content': 0.11742943525314331, 'timestamp': '2025-10-01 04:33:27.367327', 'step': 16564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:27.397383', 'step': 16564, 'epoch': 3} {'type': 'loss', 'content': 0.01570761948823929, 'timestamp': '2025-10-01 04:33:27.399600', 'step': 16565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.430941', 'step': 16565, 'epoch': 3} {'type': 'loss', 'content': 0.03549882397055626, 'timestamp': '2025-10-01 04:33:27.433275', 'step': 16566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.463221', 'step': 16566, 'epoch': 3} {'type': 'loss', 'content': 0.07688915729522705, 'timestamp': '2025-10-01 04:33:27.465361', 'step': 16567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:27.495947', 'step': 16567, 'epoch': 3} {'type': 'loss', 'content': 0.05582522973418236, 'timestamp': '2025-10-01 04:33:27.519798', 'step': 16568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.550476', 'step': 16568, 'epoch': 3} {'type': 'loss', 'content': 0.07349010556936264, 'timestamp': '2025-10-01 04:33:27.553088', 'step': 16569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.583269', 'step': 16569, 'epoch': 3} {'type': 'loss', 'content': 0.11591874063014984, 'timestamp': '2025-10-01 04:33:27.585434', 'step': 16570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.617321', 'step': 16570, 'epoch': 3} {'type': 'loss', 'content': 0.05227915197610855, 'timestamp': '2025-10-01 04:33:27.619428', 'step': 16571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.650324', 'step': 16571, 'epoch': 3} {'type': 'loss', 'content': 0.03280195966362953, 'timestamp': '2025-10-01 04:33:27.674108', 'step': 16572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:27.706516', 'step': 16572, 'epoch': 3} {'type': 'loss', 'content': 0.11538510769605637, 'timestamp': '2025-10-01 04:33:27.708557', 'step': 16573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.738903', 'step': 16573, 'epoch': 3} {'type': 'loss', 'content': 0.024583691731095314, 'timestamp': '2025-10-01 04:33:27.741099', 'step': 16574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.771502', 'step': 16574, 'epoch': 3} {'type': 'loss', 'content': 0.11291083693504333, 'timestamp': '2025-10-01 04:33:27.773566', 'step': 16575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:27.803798', 'step': 16575, 'epoch': 3} {'type': 'loss', 'content': 0.061587199568748474, 'timestamp': '2025-10-01 04:33:27.841437', 'step': 16576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:27.871776', 'step': 16576, 'epoch': 3} {'type': 'loss', 'content': 0.041111405938863754, 'timestamp': '2025-10-01 04:33:27.874218', 'step': 16577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:27.904931', 'step': 16577, 'epoch': 3} {'type': 'loss', 'content': 0.05488528311252594, 'timestamp': '2025-10-01 04:33:27.910250', 'step': 16578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.942209', 'step': 16578, 'epoch': 3} {'type': 'loss', 'content': 0.1064123585820198, 'timestamp': '2025-10-01 04:33:27.944437', 'step': 16579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:27.975118', 'step': 16579, 'epoch': 3} {'type': 'loss', 'content': 0.1176084578037262, 'timestamp': '2025-10-01 04:33:27.999120', 'step': 16580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:28.029683', 'step': 16580, 'epoch': 3} {'type': 'loss', 'content': 0.030652984976768494, 'timestamp': '2025-10-01 04:33:28.031813', 'step': 16581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:28.062450', 'step': 16581, 'epoch': 3} {'type': 'loss', 'content': 0.05193464457988739, 'timestamp': '2025-10-01 04:33:28.064825', 'step': 16582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:28.096533', 'step': 16582, 'epoch': 3} {'type': 'loss', 'content': 0.03879158943891525, 'timestamp': '2025-10-01 04:33:28.098758', 'step': 16583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:28.129097', 'step': 16583, 'epoch': 3} {'type': 'loss', 'content': 0.046970173716545105, 'timestamp': '2025-10-01 04:33:28.152992', 'step': 16584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:28.183268', 'step': 16584, 'epoch': 3} {'type': 'loss', 'content': 0.08375436067581177, 'timestamp': '2025-10-01 04:33:28.185529', 'step': 16585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:28.215698', 'step': 16585, 'epoch': 3} {'type': 'loss', 'content': 0.02706773206591606, 'timestamp': '2025-10-01 04:33:28.218330', 'step': 16586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:28.250617', 'step': 16586, 'epoch': 3} {'type': 'loss', 'content': 0.07771732658147812, 'timestamp': '2025-10-01 04:33:28.252909', 'step': 16587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:28.283421', 'step': 16587, 'epoch': 3} {'type': 'loss', 'content': 0.09847520291805267, 'timestamp': '2025-10-01 04:33:28.307346', 'step': 16588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:28.337685', 'step': 16588, 'epoch': 3} {'type': 'loss', 'content': 0.015983810648322105, 'timestamp': '2025-10-01 04:33:28.339881', 'step': 16589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:28.370391', 'step': 16589, 'epoch': 3} {'type': 'loss', 'content': 0.07456819713115692, 'timestamp': '2025-10-01 04:33:28.372577', 'step': 16590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:28.403462', 'step': 16590, 'epoch': 3} {'type': 'loss', 'content': 0.08657212555408478, 'timestamp': '2025-10-01 04:33:28.405743', 'step': 16591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:28.435922', 'step': 16591, 'epoch': 3} {'type': 'loss', 'content': 0.05654982104897499, 'timestamp': '2025-10-01 04:33:28.459684', 'step': 16592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:28.490453', 'step': 16592, 'epoch': 3} {'type': 'loss', 'content': 0.08952772617340088, 'timestamp': '2025-10-01 04:33:28.492963', 'step': 16593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:28.524111', 'step': 16593, 'epoch': 3} {'type': 'loss', 'content': 0.06769460439682007, 'timestamp': '2025-10-01 04:33:28.526353', 'step': 16594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:28.557069', 'step': 16594, 'epoch': 3} {'type': 'loss', 'content': 0.0983651652932167, 'timestamp': '2025-10-01 04:33:28.559346', 'step': 16595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:28.589631', 'step': 16595, 'epoch': 3} {'type': 'loss', 'content': 0.07685618847608566, 'timestamp': '2025-10-01 04:33:28.613362', 'step': 16596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:28.643578', 'step': 16596, 'epoch': 3} {'type': 'loss', 'content': 0.023817764595150948, 'timestamp': '2025-10-01 04:33:28.646581', 'step': 16597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:28.691368', 'step': 16597, 'epoch': 3} {'type': 'loss', 'content': 0.08958867937326431, 'timestamp': '2025-10-01 04:33:28.693757', 'step': 16598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:33:28.724829', 'step': 16598, 'epoch': 3} {'type': 'loss', 'content': 0.05959397926926613, 'timestamp': '2025-10-01 04:33:28.731775', 'step': 16599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:28.763686', 'step': 16599, 'epoch': 3} {'type': 'loss', 'content': 0.08513659238815308, 'timestamp': '2025-10-01 04:33:28.787397', 'step': 16600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:28.817747', 'step': 16600, 'epoch': 3} {'type': 'loss', 'content': 0.058615922927856445, 'timestamp': '2025-10-01 04:33:28.819911', 'step': 16601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:28.850136', 'step': 16601, 'epoch': 3} {'type': 'loss', 'content': 0.057970330119132996, 'timestamp': '2025-10-01 04:33:28.852545', 'step': 16602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:28.882590', 'step': 16602, 'epoch': 3} {'type': 'loss', 'content': 0.041536249220371246, 'timestamp': '2025-10-01 04:33:28.884869', 'step': 16603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:28.915871', 'step': 16603, 'epoch': 3} {'type': 'loss', 'content': 0.060891468077898026, 'timestamp': '2025-10-01 04:33:28.939620', 'step': 16604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:28.970573', 'step': 16604, 'epoch': 3} {'type': 'loss', 'content': 0.1022864356637001, 'timestamp': '2025-10-01 04:33:28.972969', 'step': 16605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:29.003504', 'step': 16605, 'epoch': 3} {'type': 'loss', 'content': 0.029674842953681946, 'timestamp': '2025-10-01 04:33:29.005709', 'step': 16606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:29.036202', 'step': 16606, 'epoch': 3} {'type': 'loss', 'content': 0.12090972810983658, 'timestamp': '2025-10-01 04:33:29.040037', 'step': 16607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:29.070462', 'step': 16607, 'epoch': 3} {'type': 'loss', 'content': 0.007436261046677828, 'timestamp': '2025-10-01 04:33:29.094256', 'step': 16608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:29.124861', 'step': 16608, 'epoch': 3} {'type': 'loss', 'content': 0.04680240526795387, 'timestamp': '2025-10-01 04:33:29.127186', 'step': 16609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:29.158159', 'step': 16609, 'epoch': 3} {'type': 'loss', 'content': 0.06809371709823608, 'timestamp': '2025-10-01 04:33:29.160442', 'step': 16610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:29.190503', 'step': 16610, 'epoch': 3} {'type': 'loss', 'content': 0.08980736136436462, 'timestamp': '2025-10-01 04:33:29.192727', 'step': 16611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:29.224079', 'step': 16611, 'epoch': 3} {'type': 'loss', 'content': 0.03871576115489006, 'timestamp': '2025-10-01 04:33:29.247751', 'step': 16612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:29.279022', 'step': 16612, 'epoch': 3} {'type': 'loss', 'content': 0.12681852281093597, 'timestamp': '2025-10-01 04:33:29.281127', 'step': 16613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:29.311500', 'step': 16613, 'epoch': 3} {'type': 'loss', 'content': 0.0642082691192627, 'timestamp': '2025-10-01 04:33:29.315208', 'step': 16614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:29.346957', 'step': 16614, 'epoch': 3} {'type': 'loss', 'content': 0.09015233814716339, 'timestamp': '2025-10-01 04:33:29.349919', 'step': 16615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:29.380624', 'step': 16615, 'epoch': 3} {'type': 'loss', 'content': 0.029692796990275383, 'timestamp': '2025-10-01 04:33:29.404213', 'step': 16616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:29.434955', 'step': 16616, 'epoch': 3} {'type': 'loss', 'content': 0.06648784875869751, 'timestamp': '2025-10-01 04:33:29.437069', 'step': 16617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:29.467192', 'step': 16617, 'epoch': 3} {'type': 'loss', 'content': 0.043713539838790894, 'timestamp': '2025-10-01 04:33:29.469441', 'step': 16618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:29.500679', 'step': 16618, 'epoch': 3} {'type': 'loss', 'content': 0.03053947165608406, 'timestamp': '2025-10-01 04:33:29.503002', 'step': 16619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:29.532962', 'step': 16619, 'epoch': 3} {'type': 'loss', 'content': 0.1559021770954132, 'timestamp': '2025-10-01 04:33:29.556622', 'step': 16620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:29.588838', 'step': 16620, 'epoch': 3} {'type': 'loss', 'content': 0.022429604083299637, 'timestamp': '2025-10-01 04:33:29.591060', 'step': 16621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:29.622289', 'step': 16621, 'epoch': 3} {'type': 'loss', 'content': 0.0378083735704422, 'timestamp': '2025-10-01 04:33:29.624644', 'step': 16622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:29.655287', 'step': 16622, 'epoch': 3} {'type': 'loss', 'content': 0.01724194549024105, 'timestamp': '2025-10-01 04:33:29.669406', 'step': 16623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:29.703753', 'step': 16623, 'epoch': 3} {'type': 'loss', 'content': 0.018796609714627266, 'timestamp': '2025-10-01 04:33:29.727427', 'step': 16624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:29.757573', 'step': 16624, 'epoch': 3} {'type': 'loss', 'content': 0.13423354923725128, 'timestamp': '2025-10-01 04:33:29.760050', 'step': 16625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:29.790526', 'step': 16625, 'epoch': 3} {'type': 'loss', 'content': 0.0825527012348175, 'timestamp': '2025-10-01 04:33:29.793736', 'step': 16626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:29.826232', 'step': 16626, 'epoch': 3} {'type': 'loss', 'content': 0.026629406958818436, 'timestamp': '2025-10-01 04:33:29.829199', 'step': 16627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:29.859437', 'step': 16627, 'epoch': 3} {'type': 'loss', 'content': 0.09521970897912979, 'timestamp': '2025-10-01 04:33:29.883058', 'step': 16628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:29.914226', 'step': 16628, 'epoch': 3} {'type': 'loss', 'content': 0.039856523275375366, 'timestamp': '2025-10-01 04:33:29.916687', 'step': 16629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:29.949061', 'step': 16629, 'epoch': 3} {'type': 'loss', 'content': 0.06731544435024261, 'timestamp': '2025-10-01 04:33:29.951206', 'step': 16630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:33:29.982091', 'step': 16630, 'epoch': 3} {'type': 'loss', 'content': 0.1095191016793251, 'timestamp': '2025-10-01 04:33:29.986476', 'step': 16631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:30.018225', 'step': 16631, 'epoch': 3} {'type': 'loss', 'content': 0.058301787823438644, 'timestamp': '2025-10-01 04:33:30.041870', 'step': 16632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:30.071869', 'step': 16632, 'epoch': 3} {'type': 'loss', 'content': 0.0067033786326646805, 'timestamp': '2025-10-01 04:33:30.074030', 'step': 16633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:30.103956', 'step': 16633, 'epoch': 3} {'type': 'loss', 'content': 0.11823537945747375, 'timestamp': '2025-10-01 04:33:30.106119', 'step': 16634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:30.136458', 'step': 16634, 'epoch': 3} {'type': 'loss', 'content': 0.12443550676107407, 'timestamp': '2025-10-01 04:33:30.138565', 'step': 16635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:30.168553', 'step': 16635, 'epoch': 3} {'type': 'loss', 'content': 0.023345187306404114, 'timestamp': '2025-10-01 04:33:30.192249', 'step': 16636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:30.223387', 'step': 16636, 'epoch': 3} {'type': 'loss', 'content': 0.027548331767320633, 'timestamp': '2025-10-01 04:33:30.225643', 'step': 16637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:30.256090', 'step': 16637, 'epoch': 3} {'type': 'loss', 'content': 0.07574424147605896, 'timestamp': '2025-10-01 04:33:30.258473', 'step': 16638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:30.304367', 'step': 16638, 'epoch': 3} {'type': 'loss', 'content': 0.1544182300567627, 'timestamp': '2025-10-01 04:33:30.306469', 'step': 16639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:30.336857', 'step': 16639, 'epoch': 3} {'type': 'loss', 'content': 0.04559990018606186, 'timestamp': '2025-10-01 04:33:30.360553', 'step': 16640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:30.391042', 'step': 16640, 'epoch': 3} {'type': 'loss', 'content': 0.019074568524956703, 'timestamp': '2025-10-01 04:33:30.394193', 'step': 16641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:30.438562', 'step': 16641, 'epoch': 3} {'type': 'loss', 'content': 0.07525722682476044, 'timestamp': '2025-10-01 04:33:30.440747', 'step': 16642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:30.471748', 'step': 16642, 'epoch': 3} {'type': 'loss', 'content': 0.13136479258537292, 'timestamp': '2025-10-01 04:33:30.474022', 'step': 16643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:30.504125', 'step': 16643, 'epoch': 3} {'type': 'loss', 'content': 0.05774303153157234, 'timestamp': '2025-10-01 04:33:30.527839', 'step': 16644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:30.558064', 'step': 16644, 'epoch': 3} {'type': 'loss', 'content': 0.030274974182248116, 'timestamp': '2025-10-01 04:33:30.560345', 'step': 16645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:30.591014', 'step': 16645, 'epoch': 3} {'type': 'loss', 'content': 0.07692283391952515, 'timestamp': '2025-10-01 04:33:30.593195', 'step': 16646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:30.624043', 'step': 16646, 'epoch': 3} {'type': 'loss', 'content': 0.08591233193874359, 'timestamp': '2025-10-01 04:33:30.626201', 'step': 16647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:30.656420', 'step': 16647, 'epoch': 3} {'type': 'loss', 'content': 0.03351563215255737, 'timestamp': '2025-10-01 04:33:30.680066', 'step': 16648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:30.711293', 'step': 16648, 'epoch': 3} {'type': 'loss', 'content': 0.11449949443340302, 'timestamp': '2025-10-01 04:33:30.713405', 'step': 16649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:30.745087', 'step': 16649, 'epoch': 3} {'type': 'loss', 'content': 0.04874426871538162, 'timestamp': '2025-10-01 04:33:30.747215', 'step': 16650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:30.777691', 'step': 16650, 'epoch': 3} {'type': 'loss', 'content': 0.037337131798267365, 'timestamp': '2025-10-01 04:33:30.780045', 'step': 16651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:30.810558', 'step': 16651, 'epoch': 3} {'type': 'loss', 'content': 0.07590334862470627, 'timestamp': '2025-10-01 04:33:30.834199', 'step': 16652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:30.864951', 'step': 16652, 'epoch': 3} {'type': 'loss', 'content': 0.13345476984977722, 'timestamp': '2025-10-01 04:33:30.867154', 'step': 16653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:30.897400', 'step': 16653, 'epoch': 3} {'type': 'loss', 'content': 0.07784879207611084, 'timestamp': '2025-10-01 04:33:30.899775', 'step': 16654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:30.931796', 'step': 16654, 'epoch': 3} {'type': 'loss', 'content': 0.08254358172416687, 'timestamp': '2025-10-01 04:33:30.934389', 'step': 16655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:30.965986', 'step': 16655, 'epoch': 3} {'type': 'loss', 'content': 0.07100814580917358, 'timestamp': '2025-10-01 04:33:30.989712', 'step': 16656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:31.021398', 'step': 16656, 'epoch': 3} {'type': 'loss', 'content': 0.0406830832362175, 'timestamp': '2025-10-01 04:33:31.023716', 'step': 16657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:31.055922', 'step': 16657, 'epoch': 3} {'type': 'loss', 'content': 0.0960547924041748, 'timestamp': '2025-10-01 04:33:31.058401', 'step': 16658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:31.089753', 'step': 16658, 'epoch': 3} {'type': 'loss', 'content': 0.11265040189027786, 'timestamp': '2025-10-01 04:33:31.091903', 'step': 16659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:31.122383', 'step': 16659, 'epoch': 3} {'type': 'loss', 'content': 0.1183265671133995, 'timestamp': '2025-10-01 04:33:31.146146', 'step': 16660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.177287', 'step': 16660, 'epoch': 3} {'type': 'loss', 'content': 0.061037421226501465, 'timestamp': '2025-10-01 04:33:31.179412', 'step': 16661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:31.210613', 'step': 16661, 'epoch': 3} {'type': 'loss', 'content': 0.02641802653670311, 'timestamp': '2025-10-01 04:33:31.212584', 'step': 16662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.243349', 'step': 16662, 'epoch': 3} {'type': 'loss', 'content': 0.06943982094526291, 'timestamp': '2025-10-01 04:33:31.245479', 'step': 16663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.275883', 'step': 16663, 'epoch': 3} {'type': 'loss', 'content': 0.0528654120862484, 'timestamp': '2025-10-01 04:33:31.299558', 'step': 16664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:31.330802', 'step': 16664, 'epoch': 3} {'type': 'loss', 'content': 0.12837964296340942, 'timestamp': '2025-10-01 04:33:31.333149', 'step': 16665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:31.363679', 'step': 16665, 'epoch': 3} {'type': 'loss', 'content': 0.0753779411315918, 'timestamp': '2025-10-01 04:33:31.365899', 'step': 16666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.396576', 'step': 16666, 'epoch': 3} {'type': 'loss', 'content': 0.11518923193216324, 'timestamp': '2025-10-01 04:33:31.398823', 'step': 16667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.429025', 'step': 16667, 'epoch': 3} {'type': 'loss', 'content': 0.04585081711411476, 'timestamp': '2025-10-01 04:33:31.452716', 'step': 16668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.483211', 'step': 16668, 'epoch': 3} {'type': 'loss', 'content': 0.01591002568602562, 'timestamp': '2025-10-01 04:33:31.485367', 'step': 16669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:31.515515', 'step': 16669, 'epoch': 3} {'type': 'loss', 'content': 0.08860558271408081, 'timestamp': '2025-10-01 04:33:31.518087', 'step': 16670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.548679', 'step': 16670, 'epoch': 3} {'type': 'loss', 'content': 0.12625256180763245, 'timestamp': '2025-10-01 04:33:31.550745', 'step': 16671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.582101', 'step': 16671, 'epoch': 3} {'type': 'loss', 'content': 0.10563474893569946, 'timestamp': '2025-10-01 04:33:31.605654', 'step': 16672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:31.636409', 'step': 16672, 'epoch': 3} {'type': 'loss', 'content': 0.07716178894042969, 'timestamp': '2025-10-01 04:33:31.638567', 'step': 16673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:31.669173', 'step': 16673, 'epoch': 3} {'type': 'loss', 'content': 0.08936890959739685, 'timestamp': '2025-10-01 04:33:31.671623', 'step': 16674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.702257', 'step': 16674, 'epoch': 3} {'type': 'loss', 'content': 0.02700178325176239, 'timestamp': '2025-10-01 04:33:31.704413', 'step': 16675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:31.739649', 'step': 16675, 'epoch': 3} {'type': 'loss', 'content': 0.035868190228939056, 'timestamp': '2025-10-01 04:33:31.777504', 'step': 16676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:31.809879', 'step': 16676, 'epoch': 3} {'type': 'loss', 'content': 0.06327330321073532, 'timestamp': '2025-10-01 04:33:31.812065', 'step': 16677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:31.842185', 'step': 16677, 'epoch': 3} {'type': 'loss', 'content': 0.04037605598568916, 'timestamp': '2025-10-01 04:33:31.844306', 'step': 16678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.875748', 'step': 16678, 'epoch': 3} {'type': 'loss', 'content': 0.037693776190280914, 'timestamp': '2025-10-01 04:33:31.877874', 'step': 16679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:31.908452', 'step': 16679, 'epoch': 3} {'type': 'loss', 'content': 0.07789221405982971, 'timestamp': '2025-10-01 04:33:31.932328', 'step': 16680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.964102', 'step': 16680, 'epoch': 3} {'type': 'loss', 'content': 0.06686428934335709, 'timestamp': '2025-10-01 04:33:31.966633', 'step': 16681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:31.998405', 'step': 16681, 'epoch': 3} {'type': 'loss', 'content': 0.036316294223070145, 'timestamp': '2025-10-01 04:33:32.001184', 'step': 16682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:32.034239', 'step': 16682, 'epoch': 3} {'type': 'loss', 'content': 0.07376138120889664, 'timestamp': '2025-10-01 04:33:32.036926', 'step': 16683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:32.069614', 'step': 16683, 'epoch': 3} {'type': 'loss', 'content': 0.12003467231988907, 'timestamp': '2025-10-01 04:33:32.093983', 'step': 16684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:32.126367', 'step': 16684, 'epoch': 3} {'type': 'loss', 'content': 0.09061817824840546, 'timestamp': '2025-10-01 04:33:32.128672', 'step': 16685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:32.159208', 'step': 16685, 'epoch': 3} {'type': 'loss', 'content': 0.049670394510030746, 'timestamp': '2025-10-01 04:33:32.162138', 'step': 16686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:32.197379', 'step': 16686, 'epoch': 3} {'type': 'loss', 'content': 0.08404530584812164, 'timestamp': '2025-10-01 04:33:32.199548', 'step': 16687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:32.231525', 'step': 16687, 'epoch': 3} {'type': 'loss', 'content': 0.06937042623758316, 'timestamp': '2025-10-01 04:33:32.255711', 'step': 16688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:32.286184', 'step': 16688, 'epoch': 3} {'type': 'loss', 'content': 0.04388539120554924, 'timestamp': '2025-10-01 04:33:32.288496', 'step': 16689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:32.319000', 'step': 16689, 'epoch': 3} {'type': 'loss', 'content': 0.05596354976296425, 'timestamp': '2025-10-01 04:33:32.320777', 'step': 16690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:32.352031', 'step': 16690, 'epoch': 3} {'type': 'loss', 'content': 0.05341384559869766, 'timestamp': '2025-10-01 04:33:32.354352', 'step': 16691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:32.385070', 'step': 16691, 'epoch': 3} {'type': 'loss', 'content': 0.05301256850361824, 'timestamp': '2025-10-01 04:33:32.408749', 'step': 16692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:32.438962', 'step': 16692, 'epoch': 3} {'type': 'loss', 'content': 0.165129616856575, 'timestamp': '2025-10-01 04:33:32.440988', 'step': 16693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:32.471700', 'step': 16693, 'epoch': 3} {'type': 'loss', 'content': 0.15050122141838074, 'timestamp': '2025-10-01 04:33:32.474222', 'step': 16694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:32.505433', 'step': 16694, 'epoch': 3} {'type': 'loss', 'content': 0.13093696534633636, 'timestamp': '2025-10-01 04:33:32.508172', 'step': 16695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:32.539624', 'step': 16695, 'epoch': 3} {'type': 'loss', 'content': 0.13631723821163177, 'timestamp': '2025-10-01 04:33:32.563022', 'step': 16696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:32.607858', 'step': 16696, 'epoch': 3} {'type': 'loss', 'content': 0.08136576414108276, 'timestamp': '2025-10-01 04:33:32.610265', 'step': 16697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:32.656101', 'step': 16697, 'epoch': 3} {'type': 'loss', 'content': 0.06442346423864365, 'timestamp': '2025-10-01 04:33:32.658025', 'step': 16698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:32.689883', 'step': 16698, 'epoch': 3} {'type': 'loss', 'content': 0.06689903885126114, 'timestamp': '2025-10-01 04:33:32.691984', 'step': 16699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:32.723068', 'step': 16699, 'epoch': 3} {'type': 'loss', 'content': 0.0509466677904129, 'timestamp': '2025-10-01 04:33:32.746674', 'step': 16700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:32.781359', 'step': 16700, 'epoch': 3} {'type': 'loss', 'content': 0.08294059336185455, 'timestamp': '2025-10-01 04:33:32.783156', 'step': 16701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:32.814933', 'step': 16701, 'epoch': 3} {'type': 'loss', 'content': 0.07465587556362152, 'timestamp': '2025-10-01 04:33:32.817157', 'step': 16702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:32.850531', 'step': 16702, 'epoch': 3} {'type': 'loss', 'content': 0.026746870949864388, 'timestamp': '2025-10-01 04:33:32.852946', 'step': 16703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:32.883057', 'step': 16703, 'epoch': 3} {'type': 'loss', 'content': 0.07147985696792603, 'timestamp': '2025-10-01 04:33:32.906518', 'step': 16704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:32.941239', 'step': 16704, 'epoch': 3} {'type': 'loss', 'content': 0.09405489265918732, 'timestamp': '2025-10-01 04:33:32.943575', 'step': 16705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:32.984574', 'step': 16705, 'epoch': 3} {'type': 'loss', 'content': 0.03588823229074478, 'timestamp': '2025-10-01 04:33:33.003384', 'step': 16706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:33.035708', 'step': 16706, 'epoch': 3} {'type': 'loss', 'content': 0.06442379206418991, 'timestamp': '2025-10-01 04:33:33.037542', 'step': 16707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:33.071464', 'step': 16707, 'epoch': 3} {'type': 'loss', 'content': 0.06160891801118851, 'timestamp': '2025-10-01 04:33:33.095836', 'step': 16708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:33.126624', 'step': 16708, 'epoch': 3} {'type': 'loss', 'content': 0.0437895767390728, 'timestamp': '2025-10-01 04:33:33.129177', 'step': 16709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:33.159368', 'step': 16709, 'epoch': 3} {'type': 'loss', 'content': 0.07435940206050873, 'timestamp': '2025-10-01 04:33:33.161726', 'step': 16710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:33.194687', 'step': 16710, 'epoch': 3} {'type': 'loss', 'content': 0.06500612944364548, 'timestamp': '2025-10-01 04:33:33.196745', 'step': 16711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:33.231913', 'step': 16711, 'epoch': 3} {'type': 'loss', 'content': 0.07817501574754715, 'timestamp': '2025-10-01 04:33:33.255542', 'step': 16712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:33.287368', 'step': 16712, 'epoch': 3} {'type': 'loss', 'content': 0.05739909037947655, 'timestamp': '2025-10-01 04:33:33.289515', 'step': 16713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:33.321013', 'step': 16713, 'epoch': 3} {'type': 'loss', 'content': 0.11075378954410553, 'timestamp': '2025-10-01 04:33:33.323352', 'step': 16714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:33.353856', 'step': 16714, 'epoch': 3} {'type': 'loss', 'content': 0.05588928982615471, 'timestamp': '2025-10-01 04:33:33.356159', 'step': 16715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:33.391624', 'step': 16715, 'epoch': 3} {'type': 'loss', 'content': 0.12245520949363708, 'timestamp': '2025-10-01 04:33:33.415183', 'step': 16716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:33.453274', 'step': 16716, 'epoch': 3} {'type': 'loss', 'content': 0.16492922604084015, 'timestamp': '2025-10-01 04:33:33.469923', 'step': 16717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:33.501218', 'step': 16717, 'epoch': 3} {'type': 'loss', 'content': 0.1502302587032318, 'timestamp': '2025-10-01 04:33:33.504558', 'step': 16718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:33.545622', 'step': 16718, 'epoch': 3} {'type': 'loss', 'content': 0.10372241586446762, 'timestamp': '2025-10-01 04:33:33.576059', 'step': 16719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:33.611171', 'step': 16719, 'epoch': 3} {'type': 'loss', 'content': 0.046194568276405334, 'timestamp': '2025-10-01 04:33:33.635945', 'step': 16720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:33.667300', 'step': 16720, 'epoch': 3} {'type': 'loss', 'content': 0.07828054577112198, 'timestamp': '2025-10-01 04:33:33.672893', 'step': 16721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:33.705462', 'step': 16721, 'epoch': 3} {'type': 'loss', 'content': 0.06696008890867233, 'timestamp': '2025-10-01 04:33:33.707814', 'step': 16722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:33.758735', 'step': 16722, 'epoch': 3} {'type': 'loss', 'content': 0.06620878726243973, 'timestamp': '2025-10-01 04:33:33.761948', 'step': 16723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:33.793447', 'step': 16723, 'epoch': 3} {'type': 'loss', 'content': 0.10604556649923325, 'timestamp': '2025-10-01 04:33:33.824411', 'step': 16724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:33.855088', 'step': 16724, 'epoch': 3} {'type': 'loss', 'content': 0.12477172911167145, 'timestamp': '2025-10-01 04:33:33.870752', 'step': 16725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:33.901899', 'step': 16725, 'epoch': 3} {'type': 'loss', 'content': 0.13015328347682953, 'timestamp': '2025-10-01 04:33:33.905378', 'step': 16726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:33.937513', 'step': 16726, 'epoch': 3} {'type': 'loss', 'content': 0.07804127782583237, 'timestamp': '2025-10-01 04:33:33.940527', 'step': 16727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:33.979227', 'step': 16727, 'epoch': 3} {'type': 'loss', 'content': 0.06113160401582718, 'timestamp': '2025-10-01 04:33:34.022428', 'step': 16728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:34.052783', 'step': 16728, 'epoch': 3} {'type': 'loss', 'content': 0.06180797889828682, 'timestamp': '2025-10-01 04:33:34.055407', 'step': 16729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:33:34.088060', 'step': 16729, 'epoch': 3} {'type': 'loss', 'content': 0.11977922916412354, 'timestamp': '2025-10-01 04:33:34.093481', 'step': 16730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:34.127206', 'step': 16730, 'epoch': 3} {'type': 'loss', 'content': 0.02672388218343258, 'timestamp': '2025-10-01 04:33:34.130267', 'step': 16731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:34.162584', 'step': 16731, 'epoch': 3} {'type': 'loss', 'content': 0.07153098285198212, 'timestamp': '2025-10-01 04:33:34.193673', 'step': 16732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:34.237928', 'step': 16732, 'epoch': 3} {'type': 'loss', 'content': 0.06447898596525192, 'timestamp': '2025-10-01 04:33:34.252504', 'step': 16733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:33:34.292799', 'step': 16733, 'epoch': 3} {'type': 'loss', 'content': 0.0602620467543602, 'timestamp': '2025-10-01 04:33:34.297476', 'step': 16734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:34.327817', 'step': 16734, 'epoch': 3} {'type': 'loss', 'content': 0.041781507432460785, 'timestamp': '2025-10-01 04:33:34.331322', 'step': 16735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:34.368211', 'step': 16735, 'epoch': 3} {'type': 'loss', 'content': 0.08379293978214264, 'timestamp': '2025-10-01 04:33:34.413303', 'step': 16736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:34.450779', 'step': 16736, 'epoch': 3} {'type': 'loss', 'content': 0.08707206696271896, 'timestamp': '2025-10-01 04:33:34.453226', 'step': 16737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:34.490759', 'step': 16737, 'epoch': 3} {'type': 'loss', 'content': 0.03218226507306099, 'timestamp': '2025-10-01 04:33:34.505198', 'step': 16738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:34.544478', 'step': 16738, 'epoch': 3} {'type': 'loss', 'content': 0.1276562511920929, 'timestamp': '2025-10-01 04:33:34.548577', 'step': 16739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:34.582057', 'step': 16739, 'epoch': 3} {'type': 'loss', 'content': 0.11011745035648346, 'timestamp': '2025-10-01 04:33:34.607315', 'step': 16740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:34.638730', 'step': 16740, 'epoch': 3} {'type': 'loss', 'content': 0.02445073053240776, 'timestamp': '2025-10-01 04:33:34.650731', 'step': 16741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:34.699546', 'step': 16741, 'epoch': 3} {'type': 'loss', 'content': 0.08606027066707611, 'timestamp': '2025-10-01 04:33:34.701891', 'step': 16742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:34.734508', 'step': 16742, 'epoch': 3} {'type': 'loss', 'content': 0.08132857829332352, 'timestamp': '2025-10-01 04:33:34.744697', 'step': 16743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:34.786863', 'step': 16743, 'epoch': 3} {'type': 'loss', 'content': 0.0669160783290863, 'timestamp': '2025-10-01 04:33:34.810983', 'step': 16744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:34.845095', 'step': 16744, 'epoch': 3} {'type': 'loss', 'content': 0.08053820580244064, 'timestamp': '2025-10-01 04:33:34.847361', 'step': 16745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:34.879429', 'step': 16745, 'epoch': 3} {'type': 'loss', 'content': 0.05450580641627312, 'timestamp': '2025-10-01 04:33:34.881559', 'step': 16746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:34.921496', 'step': 16746, 'epoch': 3} {'type': 'loss', 'content': 0.10922715067863464, 'timestamp': '2025-10-01 04:33:34.923738', 'step': 16747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:34.957447', 'step': 16747, 'epoch': 3} {'type': 'loss', 'content': 0.03864072263240814, 'timestamp': '2025-10-01 04:33:34.981568', 'step': 16748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:35.014091', 'step': 16748, 'epoch': 3} {'type': 'loss', 'content': 0.07705990225076675, 'timestamp': '2025-10-01 04:33:35.016990', 'step': 16749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:35.048840', 'step': 16749, 'epoch': 3} {'type': 'loss', 'content': 0.033561792224645615, 'timestamp': '2025-10-01 04:33:35.051479', 'step': 16750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:35.082100', 'step': 16750, 'epoch': 3} {'type': 'loss', 'content': 0.07762528955936432, 'timestamp': '2025-10-01 04:33:35.084047', 'step': 16751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:35.114993', 'step': 16751, 'epoch': 3} {'type': 'loss', 'content': 0.10525784641504288, 'timestamp': '2025-10-01 04:33:35.138921', 'step': 16752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:35.176896', 'step': 16752, 'epoch': 3} {'type': 'loss', 'content': 0.09106110781431198, 'timestamp': '2025-10-01 04:33:35.179265', 'step': 16753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:35.209631', 'step': 16753, 'epoch': 3} {'type': 'loss', 'content': 0.09220114350318909, 'timestamp': '2025-10-01 04:33:35.211780', 'step': 16754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:35.254408', 'step': 16754, 'epoch': 3} {'type': 'loss', 'content': 0.054547082632780075, 'timestamp': '2025-10-01 04:33:35.256899', 'step': 16755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:35.287849', 'step': 16755, 'epoch': 3} {'type': 'loss', 'content': 0.11445829272270203, 'timestamp': '2025-10-01 04:33:35.311583', 'step': 16756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:35.343294', 'step': 16756, 'epoch': 3} {'type': 'loss', 'content': 0.018794197589159012, 'timestamp': '2025-10-01 04:33:35.345634', 'step': 16757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:35.391460', 'step': 16757, 'epoch': 3} {'type': 'loss', 'content': 0.12689344584941864, 'timestamp': '2025-10-01 04:33:35.393579', 'step': 16758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:35.431542', 'step': 16758, 'epoch': 3} {'type': 'loss', 'content': 0.10175061225891113, 'timestamp': '2025-10-01 04:33:35.434144', 'step': 16759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:35.464976', 'step': 16759, 'epoch': 3} {'type': 'loss', 'content': 0.12417879700660706, 'timestamp': '2025-10-01 04:33:35.488745', 'step': 16760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:35.530014', 'step': 16760, 'epoch': 3} {'type': 'loss', 'content': 0.030384978279471397, 'timestamp': '2025-10-01 04:33:35.532095', 'step': 16761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:35.563302', 'step': 16761, 'epoch': 3} {'type': 'loss', 'content': 0.0488327294588089, 'timestamp': '2025-10-01 04:33:35.565459', 'step': 16762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:35.596252', 'step': 16762, 'epoch': 3} {'type': 'loss', 'content': 0.05265238136053085, 'timestamp': '2025-10-01 04:33:35.598410', 'step': 16763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:35.629547', 'step': 16763, 'epoch': 3} {'type': 'loss', 'content': 0.047813884913921356, 'timestamp': '2025-10-01 04:33:35.653279', 'step': 16764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:35.684458', 'step': 16764, 'epoch': 3} {'type': 'loss', 'content': 0.07535948604345322, 'timestamp': '2025-10-01 04:33:35.687784', 'step': 16765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:35.718078', 'step': 16765, 'epoch': 3} {'type': 'loss', 'content': 0.06603114306926727, 'timestamp': '2025-10-01 04:33:35.720583', 'step': 16766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:35.752578', 'step': 16766, 'epoch': 3} {'type': 'loss', 'content': 0.0698183998465538, 'timestamp': '2025-10-01 04:33:35.755308', 'step': 16767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:35.786326', 'step': 16767, 'epoch': 3} {'type': 'loss', 'content': 0.07069506496191025, 'timestamp': '2025-10-01 04:33:35.810244', 'step': 16768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:35.841196', 'step': 16768, 'epoch': 3} {'type': 'loss', 'content': 0.08085046708583832, 'timestamp': '2025-10-01 04:33:35.843351', 'step': 16769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:35.873908', 'step': 16769, 'epoch': 3} {'type': 'loss', 'content': 0.059498731046915054, 'timestamp': '2025-10-01 04:33:35.876022', 'step': 16770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:35.906450', 'step': 16770, 'epoch': 3} {'type': 'loss', 'content': 0.033200766891241074, 'timestamp': '2025-10-01 04:33:35.908567', 'step': 16771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:35.938692', 'step': 16771, 'epoch': 3} {'type': 'loss', 'content': 0.0816589817404747, 'timestamp': '2025-10-01 04:33:35.962359', 'step': 16772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:35.994577', 'step': 16772, 'epoch': 3} {'type': 'loss', 'content': 0.09300302714109421, 'timestamp': '2025-10-01 04:33:35.996692', 'step': 16773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:36.027532', 'step': 16773, 'epoch': 3} {'type': 'loss', 'content': 0.06508678197860718, 'timestamp': '2025-10-01 04:33:36.030633', 'step': 16774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:36.064349', 'step': 16774, 'epoch': 3} {'type': 'loss', 'content': 0.06207114830613136, 'timestamp': '2025-10-01 04:33:36.066627', 'step': 16775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:36.097560', 'step': 16775, 'epoch': 3} {'type': 'loss', 'content': 0.057822924107313156, 'timestamp': '2025-10-01 04:33:36.122201', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:33:45.108336', 'step': 16776, 'epoch': 3} {'type': 'pplx', 'content': 11269.449282188876, 'timestamp': '2025-10-01 04:33:45.111189', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:45.150130', 'step': 16776, 'epoch': 3} {'type': 'loss', 'content': 0.06376625597476959, 'timestamp': '2025-10-01 04:33:45.153490', 'step': 16777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:45.185208', 'step': 16777, 'epoch': 3} {'type': 'loss', 'content': 0.05340426042675972, 'timestamp': '2025-10-01 04:33:45.188106', 'step': 16778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:45.219013', 'step': 16778, 'epoch': 3} {'type': 'loss', 'content': 0.05493370443582535, 'timestamp': '2025-10-01 04:33:45.221442', 'step': 16779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:45.252076', 'step': 16779, 'epoch': 3} {'type': 'loss', 'content': 0.08633343875408173, 'timestamp': '2025-10-01 04:33:45.275893', 'step': 16780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:45.306641', 'step': 16780, 'epoch': 3} {'type': 'loss', 'content': 0.12592138350009918, 'timestamp': '2025-10-01 04:33:45.308754', 'step': 16781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:45.342973', 'step': 16781, 'epoch': 3} {'type': 'loss', 'content': 0.07545109838247299, 'timestamp': '2025-10-01 04:33:45.345562', 'step': 16782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:45.378389', 'step': 16782, 'epoch': 3} {'type': 'loss', 'content': 0.0843510702252388, 'timestamp': '2025-10-01 04:33:45.380820', 'step': 16783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:45.412556', 'step': 16783, 'epoch': 3} {'type': 'loss', 'content': 0.13656164705753326, 'timestamp': '2025-10-01 04:33:45.436291', 'step': 16784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:45.466974', 'step': 16784, 'epoch': 3} {'type': 'loss', 'content': 0.08489494025707245, 'timestamp': '2025-10-01 04:33:45.469253', 'step': 16785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:45.500541', 'step': 16785, 'epoch': 3} {'type': 'loss', 'content': 0.058811791241168976, 'timestamp': '2025-10-01 04:33:45.503688', 'step': 16786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:45.534368', 'step': 16786, 'epoch': 3} {'type': 'loss', 'content': 0.08704857528209686, 'timestamp': '2025-10-01 04:33:45.536624', 'step': 16787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:45.581601', 'step': 16787, 'epoch': 3} {'type': 'loss', 'content': 0.11012541502714157, 'timestamp': '2025-10-01 04:33:45.605456', 'step': 16788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:45.635513', 'step': 16788, 'epoch': 3} {'type': 'loss', 'content': 0.13107533752918243, 'timestamp': '2025-10-01 04:33:45.638573', 'step': 16789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:45.669116', 'step': 16789, 'epoch': 3} {'type': 'loss', 'content': 0.051484860479831696, 'timestamp': '2025-10-01 04:33:45.671597', 'step': 16790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:45.703550', 'step': 16790, 'epoch': 3} {'type': 'loss', 'content': 0.1122412160038948, 'timestamp': '2025-10-01 04:33:45.705845', 'step': 16791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:45.737037', 'step': 16791, 'epoch': 3} {'type': 'loss', 'content': 0.04792605713009834, 'timestamp': '2025-10-01 04:33:45.760723', 'step': 16792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:45.792618', 'step': 16792, 'epoch': 3} {'type': 'loss', 'content': 0.10403549671173096, 'timestamp': '2025-10-01 04:33:45.795182', 'step': 16793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:45.825530', 'step': 16793, 'epoch': 3} {'type': 'loss', 'content': 0.016676682978868484, 'timestamp': '2025-10-01 04:33:45.827703', 'step': 16794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:45.857998', 'step': 16794, 'epoch': 3} {'type': 'loss', 'content': 0.03898769989609718, 'timestamp': '2025-10-01 04:33:45.860550', 'step': 16795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:45.891622', 'step': 16795, 'epoch': 3} {'type': 'loss', 'content': 0.06124916300177574, 'timestamp': '2025-10-01 04:33:45.916292', 'step': 16796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:45.946813', 'step': 16796, 'epoch': 3} {'type': 'loss', 'content': 0.08030522614717484, 'timestamp': '2025-10-01 04:33:45.949179', 'step': 16797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:45.987046', 'step': 16797, 'epoch': 3} {'type': 'loss', 'content': 0.034187812358140945, 'timestamp': '2025-10-01 04:33:45.989367', 'step': 16798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:46.032484', 'step': 16798, 'epoch': 3} {'type': 'loss', 'content': 0.026702305302023888, 'timestamp': '2025-10-01 04:33:46.034685', 'step': 16799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:46.065405', 'step': 16799, 'epoch': 3} {'type': 'loss', 'content': 0.07403920590877533, 'timestamp': '2025-10-01 04:33:46.089084', 'step': 16800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:46.119350', 'step': 16800, 'epoch': 3} {'type': 'loss', 'content': 0.07907917350530624, 'timestamp': '2025-10-01 04:33:46.122015', 'step': 16801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:46.152077', 'step': 16801, 'epoch': 3} {'type': 'loss', 'content': 0.027744214981794357, 'timestamp': '2025-10-01 04:33:46.155204', 'step': 16802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:46.185733', 'step': 16802, 'epoch': 3} {'type': 'loss', 'content': 0.10280662029981613, 'timestamp': '2025-10-01 04:33:46.187886', 'step': 16803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:46.218119', 'step': 16803, 'epoch': 3} {'type': 'loss', 'content': 0.04893514886498451, 'timestamp': '2025-10-01 04:33:46.241954', 'step': 16804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:46.275214', 'step': 16804, 'epoch': 3} {'type': 'loss', 'content': 0.029658427461981773, 'timestamp': '2025-10-01 04:33:46.277918', 'step': 16805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:46.308780', 'step': 16805, 'epoch': 3} {'type': 'loss', 'content': 0.04948705807328224, 'timestamp': '2025-10-01 04:33:46.311469', 'step': 16806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:46.343029', 'step': 16806, 'epoch': 3} {'type': 'loss', 'content': 0.04478529840707779, 'timestamp': '2025-10-01 04:33:46.345336', 'step': 16807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:46.377913', 'step': 16807, 'epoch': 3} {'type': 'loss', 'content': 0.09498875588178635, 'timestamp': '2025-10-01 04:33:46.402235', 'step': 16808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:46.433092', 'step': 16808, 'epoch': 3} {'type': 'loss', 'content': 0.04986631125211716, 'timestamp': '2025-10-01 04:33:46.435407', 'step': 16809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:46.466472', 'step': 16809, 'epoch': 3} {'type': 'loss', 'content': 0.0694199949502945, 'timestamp': '2025-10-01 04:33:46.468786', 'step': 16810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:46.507342', 'step': 16810, 'epoch': 3} {'type': 'loss', 'content': 0.03533368930220604, 'timestamp': '2025-10-01 04:33:46.510328', 'step': 16811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:46.540559', 'step': 16811, 'epoch': 3} {'type': 'loss', 'content': 0.031224090605974197, 'timestamp': '2025-10-01 04:33:46.564358', 'step': 16812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:46.594845', 'step': 16812, 'epoch': 3} {'type': 'loss', 'content': 0.032935649156570435, 'timestamp': '2025-10-01 04:33:46.598630', 'step': 16813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:46.632251', 'step': 16813, 'epoch': 3} {'type': 'loss', 'content': 0.057050671428442, 'timestamp': '2025-10-01 04:33:46.634620', 'step': 16814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:46.666482', 'step': 16814, 'epoch': 3} {'type': 'loss', 'content': 0.07522199302911758, 'timestamp': '2025-10-01 04:33:46.668981', 'step': 16815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:46.699136', 'step': 16815, 'epoch': 3} {'type': 'loss', 'content': 0.09564263373613358, 'timestamp': '2025-10-01 04:33:46.723216', 'step': 16816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:46.753678', 'step': 16816, 'epoch': 3} {'type': 'loss', 'content': 0.04263490438461304, 'timestamp': '2025-10-01 04:33:46.755942', 'step': 16817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:46.786804', 'step': 16817, 'epoch': 3} {'type': 'loss', 'content': 0.032458771020174026, 'timestamp': '2025-10-01 04:33:46.789066', 'step': 16818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:46.820254', 'step': 16818, 'epoch': 3} {'type': 'loss', 'content': 0.07370637357234955, 'timestamp': '2025-10-01 04:33:46.822550', 'step': 16819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:46.853000', 'step': 16819, 'epoch': 3} {'type': 'loss', 'content': 0.04419294372200966, 'timestamp': '2025-10-01 04:33:46.876845', 'step': 16820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:46.908155', 'step': 16820, 'epoch': 3} {'type': 'loss', 'content': 0.07008393108844757, 'timestamp': '2025-10-01 04:33:46.910430', 'step': 16821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:46.940910', 'step': 16821, 'epoch': 3} {'type': 'loss', 'content': 0.0904824361205101, 'timestamp': '2025-10-01 04:33:46.943399', 'step': 16822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:46.974159', 'step': 16822, 'epoch': 3} {'type': 'loss', 'content': 0.0926586240530014, 'timestamp': '2025-10-01 04:33:46.976400', 'step': 16823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:47.014901', 'step': 16823, 'epoch': 3} {'type': 'loss', 'content': 0.16010500490665436, 'timestamp': '2025-10-01 04:33:47.038572', 'step': 16824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:47.076693', 'step': 16824, 'epoch': 3} {'type': 'loss', 'content': 0.025477824732661247, 'timestamp': '2025-10-01 04:33:47.079205', 'step': 16825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:47.109629', 'step': 16825, 'epoch': 3} {'type': 'loss', 'content': 0.09895583987236023, 'timestamp': '2025-10-01 04:33:47.112017', 'step': 16826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:47.142386', 'step': 16826, 'epoch': 3} {'type': 'loss', 'content': 0.0978267714381218, 'timestamp': '2025-10-01 04:33:47.144748', 'step': 16827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:47.177038', 'step': 16827, 'epoch': 3} {'type': 'loss', 'content': 0.0745813399553299, 'timestamp': '2025-10-01 04:33:47.200820', 'step': 16828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:47.231314', 'step': 16828, 'epoch': 3} {'type': 'loss', 'content': 0.08347900211811066, 'timestamp': '2025-10-01 04:33:47.233988', 'step': 16829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:47.265015', 'step': 16829, 'epoch': 3} {'type': 'loss', 'content': 0.061903972178697586, 'timestamp': '2025-10-01 04:33:47.267552', 'step': 16830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:47.298307', 'step': 16830, 'epoch': 3} {'type': 'loss', 'content': 0.06813501566648483, 'timestamp': '2025-10-01 04:33:47.300875', 'step': 16831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:47.332508', 'step': 16831, 'epoch': 3} {'type': 'loss', 'content': 0.060731858015060425, 'timestamp': '2025-10-01 04:33:47.356524', 'step': 16832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:47.387176', 'step': 16832, 'epoch': 3} {'type': 'loss', 'content': 0.04128739610314369, 'timestamp': '2025-10-01 04:33:47.390043', 'step': 16833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:47.430090', 'step': 16833, 'epoch': 3} {'type': 'loss', 'content': 0.06326176971197128, 'timestamp': '2025-10-01 04:33:47.433352', 'step': 16834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:47.464081', 'step': 16834, 'epoch': 3} {'type': 'loss', 'content': 0.11895912885665894, 'timestamp': '2025-10-01 04:33:47.466950', 'step': 16835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:47.499264', 'step': 16835, 'epoch': 3} {'type': 'loss', 'content': 0.09347499907016754, 'timestamp': '2025-10-01 04:33:47.523503', 'step': 16836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:47.555895', 'step': 16836, 'epoch': 3} {'type': 'loss', 'content': 0.09172996878623962, 'timestamp': '2025-10-01 04:33:47.558609', 'step': 16837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:47.590166', 'step': 16837, 'epoch': 3} {'type': 'loss', 'content': 0.09293108433485031, 'timestamp': '2025-10-01 04:33:47.592707', 'step': 16838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:47.623603', 'step': 16838, 'epoch': 3} {'type': 'loss', 'content': 0.05978558212518692, 'timestamp': '2025-10-01 04:33:47.625841', 'step': 16839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:47.656159', 'step': 16839, 'epoch': 3} {'type': 'loss', 'content': 0.04434497281908989, 'timestamp': '2025-10-01 04:33:47.679846', 'step': 16840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:47.710485', 'step': 16840, 'epoch': 3} {'type': 'loss', 'content': 0.1027289628982544, 'timestamp': '2025-10-01 04:33:47.712663', 'step': 16841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:47.743478', 'step': 16841, 'epoch': 3} {'type': 'loss', 'content': 0.037285216152668, 'timestamp': '2025-10-01 04:33:47.745893', 'step': 16842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:47.776732', 'step': 16842, 'epoch': 3} {'type': 'loss', 'content': 0.04981416091322899, 'timestamp': '2025-10-01 04:33:47.778780', 'step': 16843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:47.809418', 'step': 16843, 'epoch': 3} {'type': 'loss', 'content': 0.11197302490472794, 'timestamp': '2025-10-01 04:33:47.833035', 'step': 16844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:47.864584', 'step': 16844, 'epoch': 3} {'type': 'loss', 'content': 0.1283813863992691, 'timestamp': '2025-10-01 04:33:47.866915', 'step': 16845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:47.897690', 'step': 16845, 'epoch': 3} {'type': 'loss', 'content': 0.10247389227151871, 'timestamp': '2025-10-01 04:33:47.900038', 'step': 16846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:47.930710', 'step': 16846, 'epoch': 3} {'type': 'loss', 'content': 0.023431949317455292, 'timestamp': '2025-10-01 04:33:47.933551', 'step': 16847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:47.963894', 'step': 16847, 'epoch': 3} {'type': 'loss', 'content': 0.05551407113671303, 'timestamp': '2025-10-01 04:33:47.988005', 'step': 16848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:48.018659', 'step': 16848, 'epoch': 3} {'type': 'loss', 'content': 0.04999925568699837, 'timestamp': '2025-10-01 04:33:48.021000', 'step': 16849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:48.050781', 'step': 16849, 'epoch': 3} {'type': 'loss', 'content': 0.02633032761514187, 'timestamp': '2025-10-01 04:33:48.053748', 'step': 16850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:48.083677', 'step': 16850, 'epoch': 3} {'type': 'loss', 'content': 0.06991881877183914, 'timestamp': '2025-10-01 04:33:48.086000', 'step': 16851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:48.116290', 'step': 16851, 'epoch': 3} {'type': 'loss', 'content': 0.06351598352193832, 'timestamp': '2025-10-01 04:33:48.140009', 'step': 16852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:48.169861', 'step': 16852, 'epoch': 3} {'type': 'loss', 'content': 0.09672075510025024, 'timestamp': '2025-10-01 04:33:48.172043', 'step': 16853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:48.203036', 'step': 16853, 'epoch': 3} {'type': 'loss', 'content': 0.051882993429899216, 'timestamp': '2025-10-01 04:33:48.205385', 'step': 16854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:48.235687', 'step': 16854, 'epoch': 3} {'type': 'loss', 'content': 0.07264883071184158, 'timestamp': '2025-10-01 04:33:48.237866', 'step': 16855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:48.268976', 'step': 16855, 'epoch': 3} {'type': 'loss', 'content': 0.13069948554039001, 'timestamp': '2025-10-01 04:33:48.292749', 'step': 16856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:48.323478', 'step': 16856, 'epoch': 3} {'type': 'loss', 'content': 0.031294189393520355, 'timestamp': '2025-10-01 04:33:48.325879', 'step': 16857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:48.355930', 'step': 16857, 'epoch': 3} {'type': 'loss', 'content': 0.09643266350030899, 'timestamp': '2025-10-01 04:33:48.358197', 'step': 16858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:48.388505', 'step': 16858, 'epoch': 3} {'type': 'loss', 'content': 0.09786023944616318, 'timestamp': '2025-10-01 04:33:48.390760', 'step': 16859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:48.422145', 'step': 16859, 'epoch': 3} {'type': 'loss', 'content': 0.07549197971820831, 'timestamp': '2025-10-01 04:33:48.446307', 'step': 16860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:48.478315', 'step': 16860, 'epoch': 3} {'type': 'loss', 'content': 0.10991209000349045, 'timestamp': '2025-10-01 04:33:48.480507', 'step': 16861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:48.511419', 'step': 16861, 'epoch': 3} {'type': 'loss', 'content': 0.0642334595322609, 'timestamp': '2025-10-01 04:33:48.513917', 'step': 16862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:48.546240', 'step': 16862, 'epoch': 3} {'type': 'loss', 'content': 0.05337090045213699, 'timestamp': '2025-10-01 04:33:48.548536', 'step': 16863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:48.582560', 'step': 16863, 'epoch': 3} {'type': 'loss', 'content': 0.15230485796928406, 'timestamp': '2025-10-01 04:33:48.611755', 'step': 16864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:48.643510', 'step': 16864, 'epoch': 3} {'type': 'loss', 'content': 0.1274075210094452, 'timestamp': '2025-10-01 04:33:48.645992', 'step': 16865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:48.676555', 'step': 16865, 'epoch': 3} {'type': 'loss', 'content': 0.07071959972381592, 'timestamp': '2025-10-01 04:33:48.679197', 'step': 16866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:48.710293', 'step': 16866, 'epoch': 3} {'type': 'loss', 'content': 0.1039661094546318, 'timestamp': '2025-10-01 04:33:48.712528', 'step': 16867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:48.743570', 'step': 16867, 'epoch': 3} {'type': 'loss', 'content': 0.08737165480852127, 'timestamp': '2025-10-01 04:33:48.768073', 'step': 16868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:48.799280', 'step': 16868, 'epoch': 3} {'type': 'loss', 'content': 0.19364657998085022, 'timestamp': '2025-10-01 04:33:48.801934', 'step': 16869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:48.833428', 'step': 16869, 'epoch': 3} {'type': 'loss', 'content': 0.05258416756987572, 'timestamp': '2025-10-01 04:33:48.836266', 'step': 16870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:48.867912', 'step': 16870, 'epoch': 3} {'type': 'loss', 'content': 0.09859557449817657, 'timestamp': '2025-10-01 04:33:48.875898', 'step': 16871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:48.921855', 'step': 16871, 'epoch': 3} {'type': 'loss', 'content': 0.045338910073041916, 'timestamp': '2025-10-01 04:33:48.945964', 'step': 16872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:48.977450', 'step': 16872, 'epoch': 3} {'type': 'loss', 'content': 0.06228601932525635, 'timestamp': '2025-10-01 04:33:48.979631', 'step': 16873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:49.011579', 'step': 16873, 'epoch': 3} {'type': 'loss', 'content': 0.04887384548783302, 'timestamp': '2025-10-01 04:33:49.014396', 'step': 16874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:49.048882', 'step': 16874, 'epoch': 3} {'type': 'loss', 'content': 0.09095342457294464, 'timestamp': '2025-10-01 04:33:49.052911', 'step': 16875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:49.084679', 'step': 16875, 'epoch': 3} {'type': 'loss', 'content': 0.03687768429517746, 'timestamp': '2025-10-01 04:33:49.109133', 'step': 16876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:49.141750', 'step': 16876, 'epoch': 3} {'type': 'loss', 'content': 0.08543282002210617, 'timestamp': '2025-10-01 04:33:49.144526', 'step': 16877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:49.176399', 'step': 16877, 'epoch': 3} {'type': 'loss', 'content': 0.06323505938053131, 'timestamp': '2025-10-01 04:33:49.179180', 'step': 16878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:49.211785', 'step': 16878, 'epoch': 3} {'type': 'loss', 'content': 0.0390031673014164, 'timestamp': '2025-10-01 04:33:49.214591', 'step': 16879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:49.246145', 'step': 16879, 'epoch': 3} {'type': 'loss', 'content': 0.07833698391914368, 'timestamp': '2025-10-01 04:33:49.270262', 'step': 16880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:49.302053', 'step': 16880, 'epoch': 3} {'type': 'loss', 'content': 0.04058043286204338, 'timestamp': '2025-10-01 04:33:49.304836', 'step': 16881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:49.336920', 'step': 16881, 'epoch': 3} {'type': 'loss', 'content': 0.05005998536944389, 'timestamp': '2025-10-01 04:33:49.345100', 'step': 16882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:49.376552', 'step': 16882, 'epoch': 3} {'type': 'loss', 'content': 0.0478961281478405, 'timestamp': '2025-10-01 04:33:49.380855', 'step': 16883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:49.413320', 'step': 16883, 'epoch': 3} {'type': 'loss', 'content': 0.08752467483282089, 'timestamp': '2025-10-01 04:33:49.437932', 'step': 16884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:49.470672', 'step': 16884, 'epoch': 3} {'type': 'loss', 'content': 0.024948393926024437, 'timestamp': '2025-10-01 04:33:49.473074', 'step': 16885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:49.504062', 'step': 16885, 'epoch': 3} {'type': 'loss', 'content': 0.08699746429920197, 'timestamp': '2025-10-01 04:33:49.507114', 'step': 16886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:49.538516', 'step': 16886, 'epoch': 3} {'type': 'loss', 'content': 0.13463237881660461, 'timestamp': '2025-10-01 04:33:49.541394', 'step': 16887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:49.572276', 'step': 16887, 'epoch': 3} {'type': 'loss', 'content': 0.11751004308462143, 'timestamp': '2025-10-01 04:33:49.596273', 'step': 16888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:49.627935', 'step': 16888, 'epoch': 3} {'type': 'loss', 'content': 0.06301536411046982, 'timestamp': '2025-10-01 04:33:49.630548', 'step': 16889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:49.663149', 'step': 16889, 'epoch': 3} {'type': 'loss', 'content': 0.06959683448076248, 'timestamp': '2025-10-01 04:33:49.665295', 'step': 16890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:49.697100', 'step': 16890, 'epoch': 3} {'type': 'loss', 'content': 0.0678606703877449, 'timestamp': '2025-10-01 04:33:49.700609', 'step': 16891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:49.733635', 'step': 16891, 'epoch': 3} {'type': 'loss', 'content': 0.07162479311227798, 'timestamp': '2025-10-01 04:33:49.763010', 'step': 16892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:49.796543', 'step': 16892, 'epoch': 3} {'type': 'loss', 'content': 0.04697607085108757, 'timestamp': '2025-10-01 04:33:49.799349', 'step': 16893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:49.831508', 'step': 16893, 'epoch': 3} {'type': 'loss', 'content': 0.046552855521440506, 'timestamp': '2025-10-01 04:33:49.837966', 'step': 16894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:49.876962', 'step': 16894, 'epoch': 3} {'type': 'loss', 'content': 0.06452728062868118, 'timestamp': '2025-10-01 04:33:49.881180', 'step': 16895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:49.915737', 'step': 16895, 'epoch': 3} {'type': 'loss', 'content': 0.05961444601416588, 'timestamp': '2025-10-01 04:33:49.939860', 'step': 16896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:49.978307', 'step': 16896, 'epoch': 3} {'type': 'loss', 'content': 0.03008667193353176, 'timestamp': '2025-10-01 04:33:49.981931', 'step': 16897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:50.016377', 'step': 16897, 'epoch': 3} {'type': 'loss', 'content': 0.06923482567071915, 'timestamp': '2025-10-01 04:33:50.031555', 'step': 16898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:50.064203', 'step': 16898, 'epoch': 3} {'type': 'loss', 'content': 0.06266800314188004, 'timestamp': '2025-10-01 04:33:50.067057', 'step': 16899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:50.099449', 'step': 16899, 'epoch': 3} {'type': 'loss', 'content': 0.05075191333889961, 'timestamp': '2025-10-01 04:33:50.124243', 'step': 16900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:50.156991', 'step': 16900, 'epoch': 3} {'type': 'loss', 'content': 0.07903704792261124, 'timestamp': '2025-10-01 04:33:50.159530', 'step': 16901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:50.194324', 'step': 16901, 'epoch': 3} {'type': 'loss', 'content': 0.141584113240242, 'timestamp': '2025-10-01 04:33:50.197060', 'step': 16902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:50.229801', 'step': 16902, 'epoch': 3} {'type': 'loss', 'content': 0.032552335411310196, 'timestamp': '2025-10-01 04:33:50.240345', 'step': 16903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:50.273206', 'step': 16903, 'epoch': 3} {'type': 'loss', 'content': 0.05533801391720772, 'timestamp': '2025-10-01 04:33:50.306267', 'step': 16904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:50.339473', 'step': 16904, 'epoch': 3} {'type': 'loss', 'content': 0.08632293343544006, 'timestamp': '2025-10-01 04:33:50.342352', 'step': 16905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:50.377389', 'step': 16905, 'epoch': 3} {'type': 'loss', 'content': 0.04998089000582695, 'timestamp': '2025-10-01 04:33:50.380580', 'step': 16906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:50.414395', 'step': 16906, 'epoch': 3} {'type': 'loss', 'content': 0.01843082904815674, 'timestamp': '2025-10-01 04:33:50.419263', 'step': 16907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:50.452320', 'step': 16907, 'epoch': 3} {'type': 'loss', 'content': 0.07445588707923889, 'timestamp': '2025-10-01 04:33:50.476261', 'step': 16908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:50.508020', 'step': 16908, 'epoch': 3} {'type': 'loss', 'content': 0.0810670256614685, 'timestamp': '2025-10-01 04:33:50.518122', 'step': 16909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:50.550361', 'step': 16909, 'epoch': 3} {'type': 'loss', 'content': 0.02701524831354618, 'timestamp': '2025-10-01 04:33:50.552650', 'step': 16910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:50.584915', 'step': 16910, 'epoch': 3} {'type': 'loss', 'content': 0.05954764038324356, 'timestamp': '2025-10-01 04:33:50.587842', 'step': 16911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:50.620565', 'step': 16911, 'epoch': 3} {'type': 'loss', 'content': 0.025577638298273087, 'timestamp': '2025-10-01 04:33:50.653706', 'step': 16912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:50.692792', 'step': 16912, 'epoch': 3} {'type': 'loss', 'content': 0.023137524724006653, 'timestamp': '2025-10-01 04:33:50.694953', 'step': 16913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:50.725172', 'step': 16913, 'epoch': 3} {'type': 'loss', 'content': 0.057214729487895966, 'timestamp': '2025-10-01 04:33:50.727364', 'step': 16914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:50.758827', 'step': 16914, 'epoch': 3} {'type': 'loss', 'content': 0.09495700150728226, 'timestamp': '2025-10-01 04:33:50.763893', 'step': 16915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:50.794790', 'step': 16915, 'epoch': 3} {'type': 'loss', 'content': 0.0679224506020546, 'timestamp': '2025-10-01 04:33:50.818494', 'step': 16916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:50.849327', 'step': 16916, 'epoch': 3} {'type': 'loss', 'content': 0.1319677233695984, 'timestamp': '2025-10-01 04:33:50.851570', 'step': 16917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:50.882411', 'step': 16917, 'epoch': 3} {'type': 'loss', 'content': 0.033451031893491745, 'timestamp': '2025-10-01 04:33:50.884678', 'step': 16918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:50.915711', 'step': 16918, 'epoch': 3} {'type': 'loss', 'content': 0.025527575984597206, 'timestamp': '2025-10-01 04:33:50.918298', 'step': 16919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:50.949971', 'step': 16919, 'epoch': 3} {'type': 'loss', 'content': 0.04243696108460426, 'timestamp': '2025-10-01 04:33:50.973903', 'step': 16920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:51.004423', 'step': 16920, 'epoch': 3} {'type': 'loss', 'content': 0.01966588944196701, 'timestamp': '2025-10-01 04:33:51.006653', 'step': 16921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:51.036811', 'step': 16921, 'epoch': 3} {'type': 'loss', 'content': 0.0668783113360405, 'timestamp': '2025-10-01 04:33:51.039002', 'step': 16922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:51.070394', 'step': 16922, 'epoch': 3} {'type': 'loss', 'content': 0.07902960479259491, 'timestamp': '2025-10-01 04:33:51.073452', 'step': 16923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:51.105309', 'step': 16923, 'epoch': 3} {'type': 'loss', 'content': 0.05293948948383331, 'timestamp': '2025-10-01 04:33:51.133518', 'step': 16924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:51.163546', 'step': 16924, 'epoch': 3} {'type': 'loss', 'content': 0.027916913852095604, 'timestamp': '2025-10-01 04:33:51.165803', 'step': 16925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:51.196716', 'step': 16925, 'epoch': 3} {'type': 'loss', 'content': 0.04313904047012329, 'timestamp': '2025-10-01 04:33:51.200484', 'step': 16926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:51.230790', 'step': 16926, 'epoch': 3} {'type': 'loss', 'content': 0.05684078112244606, 'timestamp': '2025-10-01 04:33:51.232911', 'step': 16927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:51.263434', 'step': 16927, 'epoch': 3} {'type': 'loss', 'content': 0.10133100301027298, 'timestamp': '2025-10-01 04:33:51.287047', 'step': 16928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:51.320056', 'step': 16928, 'epoch': 3} {'type': 'loss', 'content': 0.03790470212697983, 'timestamp': '2025-10-01 04:33:51.322273', 'step': 16929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:51.353753', 'step': 16929, 'epoch': 3} {'type': 'loss', 'content': 0.10600432008504868, 'timestamp': '2025-10-01 04:33:51.357030', 'step': 16930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:51.387362', 'step': 16930, 'epoch': 3} {'type': 'loss', 'content': 0.029177071526646614, 'timestamp': '2025-10-01 04:33:51.389653', 'step': 16931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:51.420378', 'step': 16931, 'epoch': 3} {'type': 'loss', 'content': 0.07560232281684875, 'timestamp': '2025-10-01 04:33:51.444212', 'step': 16932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:51.475188', 'step': 16932, 'epoch': 3} {'type': 'loss', 'content': 0.14729413390159607, 'timestamp': '2025-10-01 04:33:51.477423', 'step': 16933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:51.510258', 'step': 16933, 'epoch': 3} {'type': 'loss', 'content': 0.04438915476202965, 'timestamp': '2025-10-01 04:33:51.512892', 'step': 16934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:51.543501', 'step': 16934, 'epoch': 3} {'type': 'loss', 'content': 0.049370765686035156, 'timestamp': '2025-10-01 04:33:51.545799', 'step': 16935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:51.580241', 'step': 16935, 'epoch': 3} {'type': 'loss', 'content': 0.024708416312932968, 'timestamp': '2025-10-01 04:33:51.603819', 'step': 16936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:51.633856', 'step': 16936, 'epoch': 3} {'type': 'loss', 'content': 0.051310695707798004, 'timestamp': '2025-10-01 04:33:51.635985', 'step': 16937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:51.665975', 'step': 16937, 'epoch': 3} {'type': 'loss', 'content': 0.08494524657726288, 'timestamp': '2025-10-01 04:33:51.671141', 'step': 16938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:51.702025', 'step': 16938, 'epoch': 3} {'type': 'loss', 'content': 0.02866174653172493, 'timestamp': '2025-10-01 04:33:51.704225', 'step': 16939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:51.735477', 'step': 16939, 'epoch': 3} {'type': 'loss', 'content': 0.12796063721179962, 'timestamp': '2025-10-01 04:33:51.761553', 'step': 16940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:51.791873', 'step': 16940, 'epoch': 3} {'type': 'loss', 'content': 0.04951286315917969, 'timestamp': '2025-10-01 04:33:51.794519', 'step': 16941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:51.824712', 'step': 16941, 'epoch': 3} {'type': 'loss', 'content': 0.09421861171722412, 'timestamp': '2025-10-01 04:33:51.848752', 'step': 16942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:51.893856', 'step': 16942, 'epoch': 3} {'type': 'loss', 'content': 0.10047167539596558, 'timestamp': '2025-10-01 04:33:51.896373', 'step': 16943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:51.929222', 'step': 16943, 'epoch': 3} {'type': 'loss', 'content': 0.08139217644929886, 'timestamp': '2025-10-01 04:33:51.953675', 'step': 16944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:51.984031', 'step': 16944, 'epoch': 3} {'type': 'loss', 'content': 0.1129063218832016, 'timestamp': '2025-10-01 04:33:51.986214', 'step': 16945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:33:52.016609', 'step': 16945, 'epoch': 3} {'type': 'loss', 'content': 0.07510250061750412, 'timestamp': '2025-10-01 04:33:52.018781', 'step': 16946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.049806', 'step': 16946, 'epoch': 3} {'type': 'loss', 'content': 0.049013737589120865, 'timestamp': '2025-10-01 04:33:52.052308', 'step': 16947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.082539', 'step': 16947, 'epoch': 3} {'type': 'loss', 'content': 0.03803905472159386, 'timestamp': '2025-10-01 04:33:52.106185', 'step': 16948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.136523', 'step': 16948, 'epoch': 3} {'type': 'loss', 'content': 0.048670098185539246, 'timestamp': '2025-10-01 04:33:52.138948', 'step': 16949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.168880', 'step': 16949, 'epoch': 3} {'type': 'loss', 'content': 0.05010144039988518, 'timestamp': '2025-10-01 04:33:52.171194', 'step': 16950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:52.203662', 'step': 16950, 'epoch': 3} {'type': 'loss', 'content': 0.007839364930987358, 'timestamp': '2025-10-01 04:33:52.205961', 'step': 16951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:52.235751', 'step': 16951, 'epoch': 3} {'type': 'loss', 'content': 0.041080277413129807, 'timestamp': '2025-10-01 04:33:52.259319', 'step': 16952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.290909', 'step': 16952, 'epoch': 3} {'type': 'loss', 'content': 0.08146855235099792, 'timestamp': '2025-10-01 04:33:52.294402', 'step': 16953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:52.332348', 'step': 16953, 'epoch': 3} {'type': 'loss', 'content': 0.0923813059926033, 'timestamp': '2025-10-01 04:33:52.334479', 'step': 16954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:52.364846', 'step': 16954, 'epoch': 3} {'type': 'loss', 'content': 0.061680082231760025, 'timestamp': '2025-10-01 04:33:52.367013', 'step': 16955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:52.399291', 'step': 16955, 'epoch': 3} {'type': 'loss', 'content': 0.10859327763319016, 'timestamp': '2025-10-01 04:33:52.425927', 'step': 16956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.456476', 'step': 16956, 'epoch': 3} {'type': 'loss', 'content': 0.055288687348365784, 'timestamp': '2025-10-01 04:33:52.468854', 'step': 16957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.500002', 'step': 16957, 'epoch': 3} {'type': 'loss', 'content': 0.08073637634515762, 'timestamp': '2025-10-01 04:33:52.502303', 'step': 16958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.533852', 'step': 16958, 'epoch': 3} {'type': 'loss', 'content': 0.06900738179683685, 'timestamp': '2025-10-01 04:33:52.550448', 'step': 16959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.581302', 'step': 16959, 'epoch': 3} {'type': 'loss', 'content': 0.07450421154499054, 'timestamp': '2025-10-01 04:33:52.605013', 'step': 16960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:52.635363', 'step': 16960, 'epoch': 3} {'type': 'loss', 'content': 0.11639731377363205, 'timestamp': '2025-10-01 04:33:52.638821', 'step': 16961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:52.671120', 'step': 16961, 'epoch': 3} {'type': 'loss', 'content': 0.047364529222249985, 'timestamp': '2025-10-01 04:33:52.673752', 'step': 16962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:52.704424', 'step': 16962, 'epoch': 3} {'type': 'loss', 'content': 0.06668085604906082, 'timestamp': '2025-10-01 04:33:52.707317', 'step': 16963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:52.738014', 'step': 16963, 'epoch': 3} {'type': 'loss', 'content': 0.039866555482149124, 'timestamp': '2025-10-01 04:33:52.761850', 'step': 16964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:52.793351', 'step': 16964, 'epoch': 3} {'type': 'loss', 'content': 0.08058691024780273, 'timestamp': '2025-10-01 04:33:52.795693', 'step': 16965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:52.827611', 'step': 16965, 'epoch': 3} {'type': 'loss', 'content': 0.013562498614192009, 'timestamp': '2025-10-01 04:33:52.829895', 'step': 16966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:52.860379', 'step': 16966, 'epoch': 3} {'type': 'loss', 'content': 0.07064268738031387, 'timestamp': '2025-10-01 04:33:52.866637', 'step': 16967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.896679', 'step': 16967, 'epoch': 3} {'type': 'loss', 'content': 0.07041109353303909, 'timestamp': '2025-10-01 04:33:52.920365', 'step': 16968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:52.951068', 'step': 16968, 'epoch': 3} {'type': 'loss', 'content': 0.0709974467754364, 'timestamp': '2025-10-01 04:33:52.953374', 'step': 16969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:52.984725', 'step': 16969, 'epoch': 3} {'type': 'loss', 'content': 0.08183267712593079, 'timestamp': '2025-10-01 04:33:52.986933', 'step': 16970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:53.017498', 'step': 16970, 'epoch': 3} {'type': 'loss', 'content': 0.05620431900024414, 'timestamp': '2025-10-01 04:33:53.025411', 'step': 16971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:53.056684', 'step': 16971, 'epoch': 3} {'type': 'loss', 'content': 0.04402931034564972, 'timestamp': '2025-10-01 04:33:53.080536', 'step': 16972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:53.110998', 'step': 16972, 'epoch': 3} {'type': 'loss', 'content': 0.1699054092168808, 'timestamp': '2025-10-01 04:33:53.113102', 'step': 16973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:53.144842', 'step': 16973, 'epoch': 3} {'type': 'loss', 'content': 0.06122533977031708, 'timestamp': '2025-10-01 04:33:53.147209', 'step': 16974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:53.177793', 'step': 16974, 'epoch': 3} {'type': 'loss', 'content': 0.05225779488682747, 'timestamp': '2025-10-01 04:33:53.200981', 'step': 16975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:53.233434', 'step': 16975, 'epoch': 3} {'type': 'loss', 'content': 0.05405433848500252, 'timestamp': '2025-10-01 04:33:53.259387', 'step': 16976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:53.303046', 'step': 16976, 'epoch': 3} {'type': 'loss', 'content': 0.02497721090912819, 'timestamp': '2025-10-01 04:33:53.305124', 'step': 16977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:53.335947', 'step': 16977, 'epoch': 3} {'type': 'loss', 'content': 0.14394250512123108, 'timestamp': '2025-10-01 04:33:53.338473', 'step': 16978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:53.368825', 'step': 16978, 'epoch': 3} {'type': 'loss', 'content': 0.033952049911022186, 'timestamp': '2025-10-01 04:33:53.371385', 'step': 16979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:53.402902', 'step': 16979, 'epoch': 3} {'type': 'loss', 'content': 0.09800957888364792, 'timestamp': '2025-10-01 04:33:53.427099', 'step': 16980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:53.476212', 'step': 16980, 'epoch': 3} {'type': 'loss', 'content': 0.05332301929593086, 'timestamp': '2025-10-01 04:33:53.482773', 'step': 16981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:53.513064', 'step': 16981, 'epoch': 3} {'type': 'loss', 'content': 0.07813861966133118, 'timestamp': '2025-10-01 04:33:53.515190', 'step': 16982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:53.545223', 'step': 16982, 'epoch': 3} {'type': 'loss', 'content': 0.09248625487089157, 'timestamp': '2025-10-01 04:33:53.547587', 'step': 16983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:53.577992', 'step': 16983, 'epoch': 3} {'type': 'loss', 'content': 0.12050686776638031, 'timestamp': '2025-10-01 04:33:53.602663', 'step': 16984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:33:53.633525', 'step': 16984, 'epoch': 3} {'type': 'loss', 'content': 0.10589902848005295, 'timestamp': '2025-10-01 04:33:53.635873', 'step': 16985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:53.666335', 'step': 16985, 'epoch': 3} {'type': 'loss', 'content': 0.04126554727554321, 'timestamp': '2025-10-01 04:33:53.668755', 'step': 16986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:53.700039', 'step': 16986, 'epoch': 3} {'type': 'loss', 'content': 0.08291161805391312, 'timestamp': '2025-10-01 04:33:53.702070', 'step': 16987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:53.734180', 'step': 16987, 'epoch': 3} {'type': 'loss', 'content': 0.07282661646604538, 'timestamp': '2025-10-01 04:33:53.757991', 'step': 16988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:53.793984', 'step': 16988, 'epoch': 3} {'type': 'loss', 'content': 0.059610553085803986, 'timestamp': '2025-10-01 04:33:53.796050', 'step': 16989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:53.827036', 'step': 16989, 'epoch': 3} {'type': 'loss', 'content': 0.062150198966264725, 'timestamp': '2025-10-01 04:33:53.829234', 'step': 16990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:53.860384', 'step': 16990, 'epoch': 3} {'type': 'loss', 'content': 0.07836554944515228, 'timestamp': '2025-10-01 04:33:53.862589', 'step': 16991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:53.893889', 'step': 16991, 'epoch': 3} {'type': 'loss', 'content': 0.07585154473781586, 'timestamp': '2025-10-01 04:33:53.917571', 'step': 16992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:53.947797', 'step': 16992, 'epoch': 3} {'type': 'loss', 'content': 0.08458933234214783, 'timestamp': '2025-10-01 04:33:53.956743', 'step': 16993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:53.987383', 'step': 16993, 'epoch': 3} {'type': 'loss', 'content': 0.06639424711465836, 'timestamp': '2025-10-01 04:33:53.989736', 'step': 16994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:54.021057', 'step': 16994, 'epoch': 3} {'type': 'loss', 'content': 0.05134262889623642, 'timestamp': '2025-10-01 04:33:54.023157', 'step': 16995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:54.053906', 'step': 16995, 'epoch': 3} {'type': 'loss', 'content': 0.07097882032394409, 'timestamp': '2025-10-01 04:33:54.077888', 'step': 16996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:54.115870', 'step': 16996, 'epoch': 3} {'type': 'loss', 'content': 0.05597468093037605, 'timestamp': '2025-10-01 04:33:54.118167', 'step': 16997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:54.148339', 'step': 16997, 'epoch': 3} {'type': 'loss', 'content': 0.1199541762471199, 'timestamp': '2025-10-01 04:33:54.150635', 'step': 16998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:33:54.180930', 'step': 16998, 'epoch': 3} {'type': 'loss', 'content': 0.1324852854013443, 'timestamp': '2025-10-01 04:33:54.184813', 'step': 16999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:54.222267', 'step': 16999, 'epoch': 3} {'type': 'loss', 'content': 0.09336107224225998, 'timestamp': '2025-10-01 04:33:54.255018', 'step': 17000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17000', 'timestamp': '2025-10-01 04:33:59.255919', 'step': 17000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:59.301865', 'step': 17000, 'epoch': 3} {'type': 'loss', 'content': 0.06472143530845642, 'timestamp': '2025-10-01 04:33:59.304068', 'step': 17001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:59.336558', 'step': 17001, 'epoch': 3} {'type': 'loss', 'content': 0.0734262764453888, 'timestamp': '2025-10-01 04:33:59.338804', 'step': 17002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.370121', 'step': 17002, 'epoch': 3} {'type': 'loss', 'content': 0.12061085551977158, 'timestamp': '2025-10-01 04:33:59.372512', 'step': 17003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.404054', 'step': 17003, 'epoch': 3} {'type': 'loss', 'content': 0.05228927358984947, 'timestamp': '2025-10-01 04:33:59.427879', 'step': 17004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:59.462166', 'step': 17004, 'epoch': 3} {'type': 'loss', 'content': 0.040749210864305496, 'timestamp': '2025-10-01 04:33:59.464506', 'step': 17005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:59.504136', 'step': 17005, 'epoch': 3} {'type': 'loss', 'content': 0.03149527311325073, 'timestamp': '2025-10-01 04:33:59.506878', 'step': 17006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.539332', 'step': 17006, 'epoch': 3} {'type': 'loss', 'content': 0.06762436032295227, 'timestamp': '2025-10-01 04:33:59.541648', 'step': 17007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:59.572852', 'step': 17007, 'epoch': 3} {'type': 'loss', 'content': 0.04996337741613388, 'timestamp': '2025-10-01 04:33:59.597132', 'step': 17008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.627929', 'step': 17008, 'epoch': 3} {'type': 'loss', 'content': 0.06839649379253387, 'timestamp': '2025-10-01 04:33:59.631081', 'step': 17009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.666805', 'step': 17009, 'epoch': 3} {'type': 'loss', 'content': 0.014526891522109509, 'timestamp': '2025-10-01 04:33:59.668943', 'step': 17010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.699481', 'step': 17010, 'epoch': 3} {'type': 'loss', 'content': 0.07822171598672867, 'timestamp': '2025-10-01 04:33:59.702003', 'step': 17011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.732481', 'step': 17011, 'epoch': 3} {'type': 'loss', 'content': 0.09053889662027359, 'timestamp': '2025-10-01 04:33:59.757594', 'step': 17012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:33:59.788469', 'step': 17012, 'epoch': 3} {'type': 'loss', 'content': 0.05636626482009888, 'timestamp': '2025-10-01 04:33:59.790730', 'step': 17013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.822286', 'step': 17013, 'epoch': 3} {'type': 'loss', 'content': 0.1698058396577835, 'timestamp': '2025-10-01 04:33:59.824881', 'step': 17014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.855666', 'step': 17014, 'epoch': 3} {'type': 'loss', 'content': 0.03626102954149246, 'timestamp': '2025-10-01 04:33:59.858062', 'step': 17015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:33:59.889469', 'step': 17015, 'epoch': 3} {'type': 'loss', 'content': 0.04756065085530281, 'timestamp': '2025-10-01 04:33:59.913186', 'step': 17016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.943907', 'step': 17016, 'epoch': 3} {'type': 'loss', 'content': 0.0968678817152977, 'timestamp': '2025-10-01 04:33:59.946952', 'step': 17017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:33:59.978159', 'step': 17017, 'epoch': 3} {'type': 'loss', 'content': 0.12771891057491302, 'timestamp': '2025-10-01 04:33:59.980516', 'step': 17018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:00.010790', 'step': 17018, 'epoch': 3} {'type': 'loss', 'content': 0.08372246474027634, 'timestamp': '2025-10-01 04:34:00.014118', 'step': 17019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:00.044546', 'step': 17019, 'epoch': 3} {'type': 'loss', 'content': 0.1041441559791565, 'timestamp': '2025-10-01 04:34:00.068923', 'step': 17020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:00.099203', 'step': 17020, 'epoch': 3} {'type': 'loss', 'content': 0.06279392540454865, 'timestamp': '2025-10-01 04:34:00.101584', 'step': 17021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:00.132216', 'step': 17021, 'epoch': 3} {'type': 'loss', 'content': 0.11407814174890518, 'timestamp': '2025-10-01 04:34:00.134387', 'step': 17022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:00.165113', 'step': 17022, 'epoch': 3} {'type': 'loss', 'content': 0.06827491521835327, 'timestamp': '2025-10-01 04:34:00.167525', 'step': 17023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:00.198474', 'step': 17023, 'epoch': 3} {'type': 'loss', 'content': 0.03985942155122757, 'timestamp': '2025-10-01 04:34:00.222231', 'step': 17024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:00.252031', 'step': 17024, 'epoch': 3} {'type': 'loss', 'content': 0.08128298074007034, 'timestamp': '2025-10-01 04:34:00.260622', 'step': 17025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:00.291147', 'step': 17025, 'epoch': 3} {'type': 'loss', 'content': 0.1494549959897995, 'timestamp': '2025-10-01 04:34:00.293467', 'step': 17026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:00.323942', 'step': 17026, 'epoch': 3} {'type': 'loss', 'content': 0.1024681106209755, 'timestamp': '2025-10-01 04:34:00.326351', 'step': 17027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:00.357068', 'step': 17027, 'epoch': 3} {'type': 'loss', 'content': 0.057149361819028854, 'timestamp': '2025-10-01 04:34:00.381122', 'step': 17028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:00.411534', 'step': 17028, 'epoch': 3} {'type': 'loss', 'content': 0.06714057177305222, 'timestamp': '2025-10-01 04:34:00.413698', 'step': 17029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:00.445103', 'step': 17029, 'epoch': 3} {'type': 'loss', 'content': 0.0394384004175663, 'timestamp': '2025-10-01 04:34:00.447292', 'step': 17030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:00.478134', 'step': 17030, 'epoch': 3} {'type': 'loss', 'content': 0.11850343644618988, 'timestamp': '2025-10-01 04:34:00.480292', 'step': 17031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:00.514043', 'step': 17031, 'epoch': 3} {'type': 'loss', 'content': 0.0744391605257988, 'timestamp': '2025-10-01 04:34:00.540163', 'step': 17032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:00.571218', 'step': 17032, 'epoch': 3} {'type': 'loss', 'content': 0.08220303803682327, 'timestamp': '2025-10-01 04:34:00.573408', 'step': 17033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:00.606965', 'step': 17033, 'epoch': 3} {'type': 'loss', 'content': 0.10025165230035782, 'timestamp': '2025-10-01 04:34:00.609112', 'step': 17034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:00.639390', 'step': 17034, 'epoch': 3} {'type': 'loss', 'content': 0.11162425577640533, 'timestamp': '2025-10-01 04:34:00.642129', 'step': 17035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:00.673236', 'step': 17035, 'epoch': 3} {'type': 'loss', 'content': 0.10599198937416077, 'timestamp': '2025-10-01 04:34:00.697194', 'step': 17036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:00.729967', 'step': 17036, 'epoch': 3} {'type': 'loss', 'content': 0.08205053210258484, 'timestamp': '2025-10-01 04:34:00.734863', 'step': 17037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:00.765534', 'step': 17037, 'epoch': 3} {'type': 'loss', 'content': 0.09921412914991379, 'timestamp': '2025-10-01 04:34:00.769819', 'step': 17038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:00.800829', 'step': 17038, 'epoch': 3} {'type': 'loss', 'content': 0.03978772088885307, 'timestamp': '2025-10-01 04:34:00.803028', 'step': 17039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:00.833189', 'step': 17039, 'epoch': 3} {'type': 'loss', 'content': 0.08133618533611298, 'timestamp': '2025-10-01 04:34:00.856920', 'step': 17040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:00.887562', 'step': 17040, 'epoch': 3} {'type': 'loss', 'content': 0.03066038340330124, 'timestamp': '2025-10-01 04:34:00.893150', 'step': 17041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:00.923565', 'step': 17041, 'epoch': 3} {'type': 'loss', 'content': 0.04826982691884041, 'timestamp': '2025-10-01 04:34:00.927976', 'step': 17042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:00.958432', 'step': 17042, 'epoch': 3} {'type': 'loss', 'content': 0.0808011144399643, 'timestamp': '2025-10-01 04:34:00.960644', 'step': 17043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:00.992596', 'step': 17043, 'epoch': 3} {'type': 'loss', 'content': 0.03469974175095558, 'timestamp': '2025-10-01 04:34:01.016311', 'step': 17044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:01.046756', 'step': 17044, 'epoch': 3} {'type': 'loss', 'content': 0.06312434375286102, 'timestamp': '2025-10-01 04:34:01.048957', 'step': 17045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:01.079030', 'step': 17045, 'epoch': 3} {'type': 'loss', 'content': 0.08777644485235214, 'timestamp': '2025-10-01 04:34:01.081252', 'step': 17046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:01.112449', 'step': 17046, 'epoch': 3} {'type': 'loss', 'content': 0.07768832892179489, 'timestamp': '2025-10-01 04:34:01.114815', 'step': 17047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:01.144888', 'step': 17047, 'epoch': 3} {'type': 'loss', 'content': 0.17927294969558716, 'timestamp': '2025-10-01 04:34:01.173115', 'step': 17048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:01.213069', 'step': 17048, 'epoch': 3} {'type': 'loss', 'content': 0.06999567896127701, 'timestamp': '2025-10-01 04:34:01.215938', 'step': 17049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:01.249265', 'step': 17049, 'epoch': 3} {'type': 'loss', 'content': 0.052566636353731155, 'timestamp': '2025-10-01 04:34:01.252993', 'step': 17050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:01.287098', 'step': 17050, 'epoch': 3} {'type': 'loss', 'content': 0.06046302989125252, 'timestamp': '2025-10-01 04:34:01.294448', 'step': 17051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:01.325855', 'step': 17051, 'epoch': 3} {'type': 'loss', 'content': 0.15438713133335114, 'timestamp': '2025-10-01 04:34:01.349663', 'step': 17052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:01.392801', 'step': 17052, 'epoch': 3} {'type': 'loss', 'content': 0.053537677973508835, 'timestamp': '2025-10-01 04:34:01.396935', 'step': 17053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:01.445211', 'step': 17053, 'epoch': 3} {'type': 'loss', 'content': 0.05041152611374855, 'timestamp': '2025-10-01 04:34:01.460048', 'step': 17054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:01.491277', 'step': 17054, 'epoch': 3} {'type': 'loss', 'content': 0.15998545289039612, 'timestamp': '2025-10-01 04:34:01.494145', 'step': 17055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:01.526228', 'step': 17055, 'epoch': 3} {'type': 'loss', 'content': 0.026233837008476257, 'timestamp': '2025-10-01 04:34:01.552773', 'step': 17056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:01.586974', 'step': 17056, 'epoch': 3} {'type': 'loss', 'content': 0.08683516085147858, 'timestamp': '2025-10-01 04:34:01.599690', 'step': 17057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:01.639529', 'step': 17057, 'epoch': 3} {'type': 'loss', 'content': 0.14161145687103271, 'timestamp': '2025-10-01 04:34:01.647179', 'step': 17058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:01.679652', 'step': 17058, 'epoch': 3} {'type': 'loss', 'content': 0.06009907275438309, 'timestamp': '2025-10-01 04:34:01.698857', 'step': 17059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:01.732251', 'step': 17059, 'epoch': 3} {'type': 'loss', 'content': 0.04711184650659561, 'timestamp': '2025-10-01 04:34:01.757507', 'step': 17060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:01.811026', 'step': 17060, 'epoch': 3} {'type': 'loss', 'content': 0.10688143223524094, 'timestamp': '2025-10-01 04:34:01.823331', 'step': 17061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:01.859916', 'step': 17061, 'epoch': 3} {'type': 'loss', 'content': 0.0684007927775383, 'timestamp': '2025-10-01 04:34:01.867223', 'step': 17062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:01.898191', 'step': 17062, 'epoch': 3} {'type': 'loss', 'content': 0.06230723112821579, 'timestamp': '2025-10-01 04:34:01.900466', 'step': 17063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:01.941280', 'step': 17063, 'epoch': 3} {'type': 'loss', 'content': 0.08319282531738281, 'timestamp': '2025-10-01 04:34:01.966932', 'step': 17064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:01.998949', 'step': 17064, 'epoch': 3} {'type': 'loss', 'content': 0.07021833956241608, 'timestamp': '2025-10-01 04:34:02.001558', 'step': 17065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:02.031926', 'step': 17065, 'epoch': 3} {'type': 'loss', 'content': 0.06517811864614487, 'timestamp': '2025-10-01 04:34:02.041793', 'step': 17066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:02.075187', 'step': 17066, 'epoch': 3} {'type': 'loss', 'content': 0.09386587888002396, 'timestamp': '2025-10-01 04:34:02.087102', 'step': 17067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:02.119623', 'step': 17067, 'epoch': 3} {'type': 'loss', 'content': 0.07183368504047394, 'timestamp': '2025-10-01 04:34:02.144854', 'step': 17068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:02.193024', 'step': 17068, 'epoch': 3} {'type': 'loss', 'content': 0.07062346488237381, 'timestamp': '2025-10-01 04:34:02.209652', 'step': 17069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:02.241365', 'step': 17069, 'epoch': 3} {'type': 'loss', 'content': 0.0636804848909378, 'timestamp': '2025-10-01 04:34:02.248457', 'step': 17070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:02.284635', 'step': 17070, 'epoch': 3} {'type': 'loss', 'content': 0.08719270676374435, 'timestamp': '2025-10-01 04:34:02.287087', 'step': 17071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:02.323083', 'step': 17071, 'epoch': 3} {'type': 'loss', 'content': 0.07793737947940826, 'timestamp': '2025-10-01 04:34:02.358780', 'step': 17072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:02.398005', 'step': 17072, 'epoch': 3} {'type': 'loss', 'content': 0.04666539654135704, 'timestamp': '2025-10-01 04:34:02.401623', 'step': 17073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:02.432048', 'step': 17073, 'epoch': 3} {'type': 'loss', 'content': 0.055685706436634064, 'timestamp': '2025-10-01 04:34:02.434361', 'step': 17074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:02.464701', 'step': 17074, 'epoch': 3} {'type': 'loss', 'content': 0.08333234488964081, 'timestamp': '2025-10-01 04:34:02.466992', 'step': 17075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:02.498285', 'step': 17075, 'epoch': 3} {'type': 'loss', 'content': 0.10592923313379288, 'timestamp': '2025-10-01 04:34:02.521936', 'step': 17076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:02.552941', 'step': 17076, 'epoch': 3} {'type': 'loss', 'content': 0.0787448137998581, 'timestamp': '2025-10-01 04:34:02.555756', 'step': 17077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:02.585802', 'step': 17077, 'epoch': 3} {'type': 'loss', 'content': 0.045053135603666306, 'timestamp': '2025-10-01 04:34:02.588268', 'step': 17078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:02.618073', 'step': 17078, 'epoch': 3} {'type': 'loss', 'content': 0.12639658153057098, 'timestamp': '2025-10-01 04:34:02.620385', 'step': 17079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:02.651602', 'step': 17079, 'epoch': 3} {'type': 'loss', 'content': 0.058558110147714615, 'timestamp': '2025-10-01 04:34:02.675260', 'step': 17080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:02.705843', 'step': 17080, 'epoch': 3} {'type': 'loss', 'content': 0.08367504924535751, 'timestamp': '2025-10-01 04:34:02.708258', 'step': 17081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:02.738178', 'step': 17081, 'epoch': 3} {'type': 'loss', 'content': 0.1340799778699875, 'timestamp': '2025-10-01 04:34:02.740359', 'step': 17082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:02.771445', 'step': 17082, 'epoch': 3} {'type': 'loss', 'content': 0.07842133939266205, 'timestamp': '2025-10-01 04:34:02.773799', 'step': 17083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:02.805718', 'step': 17083, 'epoch': 3} {'type': 'loss', 'content': 0.11662479490041733, 'timestamp': '2025-10-01 04:34:02.829402', 'step': 17084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:02.859922', 'step': 17084, 'epoch': 3} {'type': 'loss', 'content': 0.10856913775205612, 'timestamp': '2025-10-01 04:34:02.862463', 'step': 17085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:02.892687', 'step': 17085, 'epoch': 3} {'type': 'loss', 'content': 0.03625563904643059, 'timestamp': '2025-10-01 04:34:02.894846', 'step': 17086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:02.925843', 'step': 17086, 'epoch': 3} {'type': 'loss', 'content': 0.0765296071767807, 'timestamp': '2025-10-01 04:34:02.928226', 'step': 17087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:02.958595', 'step': 17087, 'epoch': 3} {'type': 'loss', 'content': 0.13168483972549438, 'timestamp': '2025-10-01 04:34:02.982208', 'step': 17088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:03.012966', 'step': 17088, 'epoch': 3} {'type': 'loss', 'content': 0.10916951298713684, 'timestamp': '2025-10-01 04:34:03.015230', 'step': 17089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:03.045712', 'step': 17089, 'epoch': 3} {'type': 'loss', 'content': 0.10351570695638657, 'timestamp': '2025-10-01 04:34:03.048122', 'step': 17090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:03.078805', 'step': 17090, 'epoch': 3} {'type': 'loss', 'content': 0.0351741649210453, 'timestamp': '2025-10-01 04:34:03.080962', 'step': 17091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:03.126576', 'step': 17091, 'epoch': 3} {'type': 'loss', 'content': 0.12931035459041595, 'timestamp': '2025-10-01 04:34:03.150204', 'step': 17092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:03.180673', 'step': 17092, 'epoch': 3} {'type': 'loss', 'content': 0.16028976440429688, 'timestamp': '2025-10-01 04:34:03.183096', 'step': 17093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:03.213592', 'step': 17093, 'epoch': 3} {'type': 'loss', 'content': 0.0858449935913086, 'timestamp': '2025-10-01 04:34:03.215823', 'step': 17094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:03.245455', 'step': 17094, 'epoch': 3} {'type': 'loss', 'content': 0.1416531652212143, 'timestamp': '2025-10-01 04:34:03.248662', 'step': 17095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:03.279133', 'step': 17095, 'epoch': 3} {'type': 'loss', 'content': 0.10670792311429977, 'timestamp': '2025-10-01 04:34:03.302888', 'step': 17096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:03.333348', 'step': 17096, 'epoch': 3} {'type': 'loss', 'content': 0.07635347545146942, 'timestamp': '2025-10-01 04:34:03.335774', 'step': 17097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:03.366176', 'step': 17097, 'epoch': 3} {'type': 'loss', 'content': 0.0635414868593216, 'timestamp': '2025-10-01 04:34:03.368860', 'step': 17098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:03.399321', 'step': 17098, 'epoch': 3} {'type': 'loss', 'content': 0.04466463252902031, 'timestamp': '2025-10-01 04:34:03.402851', 'step': 17099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:03.434114', 'step': 17099, 'epoch': 3} {'type': 'loss', 'content': 0.11401825398206711, 'timestamp': '2025-10-01 04:34:03.464627', 'step': 17100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:03.496069', 'step': 17100, 'epoch': 3} {'type': 'loss', 'content': 0.05531779304146767, 'timestamp': '2025-10-01 04:34:03.498203', 'step': 17101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:03.528958', 'step': 17101, 'epoch': 3} {'type': 'loss', 'content': 0.1483488529920578, 'timestamp': '2025-10-01 04:34:03.536090', 'step': 17102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:03.567036', 'step': 17102, 'epoch': 3} {'type': 'loss', 'content': 0.03932672739028931, 'timestamp': '2025-10-01 04:34:03.569424', 'step': 17103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:03.599853', 'step': 17103, 'epoch': 3} {'type': 'loss', 'content': 0.06188278645277023, 'timestamp': '2025-10-01 04:34:03.623517', 'step': 17104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:03.653583', 'step': 17104, 'epoch': 3} {'type': 'loss', 'content': 0.14032502472400665, 'timestamp': '2025-10-01 04:34:03.655786', 'step': 17105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:03.686333', 'step': 17105, 'epoch': 3} {'type': 'loss', 'content': 0.11265236139297485, 'timestamp': '2025-10-01 04:34:03.688639', 'step': 17106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:03.719774', 'step': 17106, 'epoch': 3} {'type': 'loss', 'content': 0.06850588321685791, 'timestamp': '2025-10-01 04:34:03.721990', 'step': 17107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:03.751731', 'step': 17107, 'epoch': 3} {'type': 'loss', 'content': 0.15867820382118225, 'timestamp': '2025-10-01 04:34:03.775617', 'step': 17108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:03.807964', 'step': 17108, 'epoch': 3} {'type': 'loss', 'content': 0.04637366533279419, 'timestamp': '2025-10-01 04:34:03.810054', 'step': 17109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:03.840821', 'step': 17109, 'epoch': 3} {'type': 'loss', 'content': 0.0640699565410614, 'timestamp': '2025-10-01 04:34:03.843103', 'step': 17110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:03.874332', 'step': 17110, 'epoch': 3} {'type': 'loss', 'content': 0.040784187614917755, 'timestamp': '2025-10-01 04:34:03.876584', 'step': 17111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:03.907263', 'step': 17111, 'epoch': 3} {'type': 'loss', 'content': 0.06534972786903381, 'timestamp': '2025-10-01 04:34:03.931252', 'step': 17112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:03.968697', 'step': 17112, 'epoch': 3} {'type': 'loss', 'content': 0.05288677662611008, 'timestamp': '2025-10-01 04:34:03.970993', 'step': 17113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:04.003122', 'step': 17113, 'epoch': 3} {'type': 'loss', 'content': 0.08612867444753647, 'timestamp': '2025-10-01 04:34:04.005796', 'step': 17114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:04.037475', 'step': 17114, 'epoch': 3} {'type': 'loss', 'content': 0.06180791184306145, 'timestamp': '2025-10-01 04:34:04.039668', 'step': 17115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:04.070501', 'step': 17115, 'epoch': 3} {'type': 'loss', 'content': 0.0870867669582367, 'timestamp': '2025-10-01 04:34:04.094170', 'step': 17116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:04.126145', 'step': 17116, 'epoch': 3} {'type': 'loss', 'content': 0.04762837290763855, 'timestamp': '2025-10-01 04:34:04.128980', 'step': 17117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:04.160015', 'step': 17117, 'epoch': 3} {'type': 'loss', 'content': 0.04883895814418793, 'timestamp': '2025-10-01 04:34:04.162249', 'step': 17118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:04.193149', 'step': 17118, 'epoch': 3} {'type': 'loss', 'content': 0.07370301336050034, 'timestamp': '2025-10-01 04:34:04.195611', 'step': 17119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:04.225762', 'step': 17119, 'epoch': 3} {'type': 'loss', 'content': 0.05339450016617775, 'timestamp': '2025-10-01 04:34:04.249452', 'step': 17120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:04.280879', 'step': 17120, 'epoch': 3} {'type': 'loss', 'content': 0.032166749238967896, 'timestamp': '2025-10-01 04:34:04.283347', 'step': 17121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:34:04.316706', 'step': 17121, 'epoch': 3} {'type': 'loss', 'content': 0.0802140161395073, 'timestamp': '2025-10-01 04:34:04.321550', 'step': 17122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:04.354805', 'step': 17122, 'epoch': 3} {'type': 'loss', 'content': 0.024261318147182465, 'timestamp': '2025-10-01 04:34:04.357099', 'step': 17123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:04.392065', 'step': 17123, 'epoch': 3} {'type': 'loss', 'content': 0.037301816046237946, 'timestamp': '2025-10-01 04:34:04.419228', 'step': 17124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:04.450050', 'step': 17124, 'epoch': 3} {'type': 'loss', 'content': 0.04865895211696625, 'timestamp': '2025-10-01 04:34:04.452243', 'step': 17125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:04.482702', 'step': 17125, 'epoch': 3} {'type': 'loss', 'content': 0.12365289777517319, 'timestamp': '2025-10-01 04:34:04.484896', 'step': 17126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:04.515693', 'step': 17126, 'epoch': 3} {'type': 'loss', 'content': 0.06550189852714539, 'timestamp': '2025-10-01 04:34:04.517876', 'step': 17127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:04.550036', 'step': 17127, 'epoch': 3} {'type': 'loss', 'content': 0.07921497523784637, 'timestamp': '2025-10-01 04:34:04.574049', 'step': 17128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:04.606053', 'step': 17128, 'epoch': 3} {'type': 'loss', 'content': 0.07469744235277176, 'timestamp': '2025-10-01 04:34:04.608194', 'step': 17129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:04.638635', 'step': 17129, 'epoch': 3} {'type': 'loss', 'content': 0.10351589322090149, 'timestamp': '2025-10-01 04:34:04.646875', 'step': 17130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:04.677409', 'step': 17130, 'epoch': 3} {'type': 'loss', 'content': 0.10347086936235428, 'timestamp': '2025-10-01 04:34:04.680337', 'step': 17131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:04.713552', 'step': 17131, 'epoch': 3} {'type': 'loss', 'content': 0.07745778560638428, 'timestamp': '2025-10-01 04:34:04.737263', 'step': 17132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:04.767542', 'step': 17132, 'epoch': 3} {'type': 'loss', 'content': 0.04087166115641594, 'timestamp': '2025-10-01 04:34:04.769688', 'step': 17133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:04.803698', 'step': 17133, 'epoch': 3} {'type': 'loss', 'content': 0.05622955411672592, 'timestamp': '2025-10-01 04:34:04.806916', 'step': 17134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:04.851138', 'step': 17134, 'epoch': 3} {'type': 'loss', 'content': 0.08384863287210464, 'timestamp': '2025-10-01 04:34:04.854302', 'step': 17135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:04.886552', 'step': 17135, 'epoch': 3} {'type': 'loss', 'content': 0.1304604560136795, 'timestamp': '2025-10-01 04:34:04.910290', 'step': 17136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:04.945161', 'step': 17136, 'epoch': 3} {'type': 'loss', 'content': 0.011898443102836609, 'timestamp': '2025-10-01 04:34:04.961942', 'step': 17137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:04.993103', 'step': 17137, 'epoch': 3} {'type': 'loss', 'content': 0.11878567934036255, 'timestamp': '2025-10-01 04:34:05.002993', 'step': 17138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:05.033555', 'step': 17138, 'epoch': 3} {'type': 'loss', 'content': 0.06826300919055939, 'timestamp': '2025-10-01 04:34:05.035854', 'step': 17139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:34:05.066505', 'step': 17139, 'epoch': 3} {'type': 'loss', 'content': 0.05214579775929451, 'timestamp': '2025-10-01 04:34:05.091789', 'step': 17140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:05.122812', 'step': 17140, 'epoch': 3} {'type': 'loss', 'content': 0.04119127616286278, 'timestamp': '2025-10-01 04:34:05.124942', 'step': 17141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:05.154963', 'step': 17141, 'epoch': 3} {'type': 'loss', 'content': 0.04215213283896446, 'timestamp': '2025-10-01 04:34:05.157156', 'step': 17142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:05.196146', 'step': 17142, 'epoch': 3} {'type': 'loss', 'content': 0.1179567351937294, 'timestamp': '2025-10-01 04:34:05.198296', 'step': 17143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:05.228746', 'step': 17143, 'epoch': 3} {'type': 'loss', 'content': 0.1225820854306221, 'timestamp': '2025-10-01 04:34:05.252549', 'step': 17144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:05.283965', 'step': 17144, 'epoch': 3} {'type': 'loss', 'content': 0.06309275329113007, 'timestamp': '2025-10-01 04:34:05.286467', 'step': 17145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:05.316978', 'step': 17145, 'epoch': 3} {'type': 'loss', 'content': 0.10970707982778549, 'timestamp': '2025-10-01 04:34:05.319433', 'step': 17146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:05.360205', 'step': 17146, 'epoch': 3} {'type': 'loss', 'content': 0.07393094152212143, 'timestamp': '2025-10-01 04:34:05.362800', 'step': 17147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:05.393329', 'step': 17147, 'epoch': 3} {'type': 'loss', 'content': 0.06419285386800766, 'timestamp': '2025-10-01 04:34:05.418346', 'step': 17148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:05.449471', 'step': 17148, 'epoch': 3} {'type': 'loss', 'content': 0.0634462907910347, 'timestamp': '2025-10-01 04:34:05.451807', 'step': 17149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:05.489354', 'step': 17149, 'epoch': 3} {'type': 'loss', 'content': 0.1019655242562294, 'timestamp': '2025-10-01 04:34:05.498213', 'step': 17150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:05.532786', 'step': 17150, 'epoch': 3} {'type': 'loss', 'content': 0.07025356590747833, 'timestamp': '2025-10-01 04:34:05.535369', 'step': 17151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:05.570088', 'step': 17151, 'epoch': 3} {'type': 'loss', 'content': 0.07511606812477112, 'timestamp': '2025-10-01 04:34:05.594165', 'step': 17152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:05.628196', 'step': 17152, 'epoch': 3} {'type': 'loss', 'content': 0.029676001518964767, 'timestamp': '2025-10-01 04:34:05.630953', 'step': 17153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:05.670627', 'step': 17153, 'epoch': 3} {'type': 'loss', 'content': 0.07228177040815353, 'timestamp': '2025-10-01 04:34:05.673675', 'step': 17154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:05.705177', 'step': 17154, 'epoch': 3} {'type': 'loss', 'content': 0.08164970576763153, 'timestamp': '2025-10-01 04:34:05.707716', 'step': 17155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:05.739352', 'step': 17155, 'epoch': 3} {'type': 'loss', 'content': 0.05073237046599388, 'timestamp': '2025-10-01 04:34:05.763097', 'step': 17156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:05.793240', 'step': 17156, 'epoch': 3} {'type': 'loss', 'content': 0.1034964993596077, 'timestamp': '2025-10-01 04:34:05.795983', 'step': 17157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:05.837401', 'step': 17157, 'epoch': 3} {'type': 'loss', 'content': 0.0715857446193695, 'timestamp': '2025-10-01 04:34:05.839582', 'step': 17158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:05.870729', 'step': 17158, 'epoch': 3} {'type': 'loss', 'content': 0.06320597231388092, 'timestamp': '2025-10-01 04:34:05.873917', 'step': 17159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:05.906338', 'step': 17159, 'epoch': 3} {'type': 'loss', 'content': 0.026141749694943428, 'timestamp': '2025-10-01 04:34:05.931963', 'step': 17160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:05.961898', 'step': 17160, 'epoch': 3} {'type': 'loss', 'content': 0.1505853831768036, 'timestamp': '2025-10-01 04:34:05.968529', 'step': 17161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:06.017122', 'step': 17161, 'epoch': 3} {'type': 'loss', 'content': 0.046886689960956573, 'timestamp': '2025-10-01 04:34:06.022043', 'step': 17162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:06.052936', 'step': 17162, 'epoch': 3} {'type': 'loss', 'content': 0.14172662794589996, 'timestamp': '2025-10-01 04:34:06.055197', 'step': 17163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:06.085271', 'step': 17163, 'epoch': 3} {'type': 'loss', 'content': 0.0346013605594635, 'timestamp': '2025-10-01 04:34:06.109362', 'step': 17164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:06.140175', 'step': 17164, 'epoch': 3} {'type': 'loss', 'content': 0.14251628518104553, 'timestamp': '2025-10-01 04:34:06.142833', 'step': 17165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:06.173841', 'step': 17165, 'epoch': 3} {'type': 'loss', 'content': 0.09050318598747253, 'timestamp': '2025-10-01 04:34:06.176595', 'step': 17166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:06.208171', 'step': 17166, 'epoch': 3} {'type': 'loss', 'content': 0.058327555656433105, 'timestamp': '2025-10-01 04:34:06.210560', 'step': 17167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:06.242511', 'step': 17167, 'epoch': 3} {'type': 'loss', 'content': 0.060808394104242325, 'timestamp': '2025-10-01 04:34:06.266595', 'step': 17168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:06.298318', 'step': 17168, 'epoch': 3} {'type': 'loss', 'content': 0.053340375423431396, 'timestamp': '2025-10-01 04:34:06.300792', 'step': 17169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:06.332178', 'step': 17169, 'epoch': 3} {'type': 'loss', 'content': 0.08309339731931686, 'timestamp': '2025-10-01 04:34:06.334685', 'step': 17170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:06.367236', 'step': 17170, 'epoch': 3} {'type': 'loss', 'content': 0.03160465508699417, 'timestamp': '2025-10-01 04:34:06.370099', 'step': 17171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:06.401058', 'step': 17171, 'epoch': 3} {'type': 'loss', 'content': 0.0681595429778099, 'timestamp': '2025-10-01 04:34:06.424854', 'step': 17172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:06.456070', 'step': 17172, 'epoch': 3} {'type': 'loss', 'content': 0.04294119030237198, 'timestamp': '2025-10-01 04:34:06.458486', 'step': 17173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:06.489759', 'step': 17173, 'epoch': 3} {'type': 'loss', 'content': 0.08311724662780762, 'timestamp': '2025-10-01 04:34:06.492112', 'step': 17174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:34:06.523525', 'step': 17174, 'epoch': 3} {'type': 'loss', 'content': 0.06771375983953476, 'timestamp': '2025-10-01 04:34:06.528014', 'step': 17175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:06.558319', 'step': 17175, 'epoch': 3} {'type': 'loss', 'content': 0.056151602417230606, 'timestamp': '2025-10-01 04:34:06.582560', 'step': 17176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:06.614071', 'step': 17176, 'epoch': 3} {'type': 'loss', 'content': 0.1272980123758316, 'timestamp': '2025-10-01 04:34:06.616378', 'step': 17177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:06.646694', 'step': 17177, 'epoch': 3} {'type': 'loss', 'content': 0.019056737422943115, 'timestamp': '2025-10-01 04:34:06.653119', 'step': 17178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:06.686461', 'step': 17178, 'epoch': 3} {'type': 'loss', 'content': 0.09128385037183762, 'timestamp': '2025-10-01 04:34:06.704153', 'step': 17179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:06.735283', 'step': 17179, 'epoch': 3} {'type': 'loss', 'content': 0.029279500246047974, 'timestamp': '2025-10-01 04:34:06.759471', 'step': 17180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:06.790992', 'step': 17180, 'epoch': 3} {'type': 'loss', 'content': 0.0946170836687088, 'timestamp': '2025-10-01 04:34:06.793636', 'step': 17181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:06.825735', 'step': 17181, 'epoch': 3} {'type': 'loss', 'content': 0.08517026901245117, 'timestamp': '2025-10-01 04:34:06.829351', 'step': 17182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:06.860793', 'step': 17182, 'epoch': 3} {'type': 'loss', 'content': 0.05092319846153259, 'timestamp': '2025-10-01 04:34:06.869088', 'step': 17183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:06.902437', 'step': 17183, 'epoch': 3} {'type': 'loss', 'content': 0.07127948850393295, 'timestamp': '2025-10-01 04:34:06.926169', 'step': 17184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:06.956663', 'step': 17184, 'epoch': 3} {'type': 'loss', 'content': 0.07650277018547058, 'timestamp': '2025-10-01 04:34:06.959418', 'step': 17185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:06.990502', 'step': 17185, 'epoch': 3} {'type': 'loss', 'content': 0.07938699424266815, 'timestamp': '2025-10-01 04:34:06.993274', 'step': 17186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:07.029033', 'step': 17186, 'epoch': 3} {'type': 'loss', 'content': 0.0840102881193161, 'timestamp': '2025-10-01 04:34:07.031267', 'step': 17187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:07.062351', 'step': 17187, 'epoch': 3} {'type': 'loss', 'content': 0.0263203252106905, 'timestamp': '2025-10-01 04:34:07.086657', 'step': 17188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:07.117334', 'step': 17188, 'epoch': 3} {'type': 'loss', 'content': 0.0998668521642685, 'timestamp': '2025-10-01 04:34:07.119545', 'step': 17189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:07.150788', 'step': 17189, 'epoch': 3} {'type': 'loss', 'content': 0.05241219699382782, 'timestamp': '2025-10-01 04:34:07.153246', 'step': 17190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:07.184381', 'step': 17190, 'epoch': 3} {'type': 'loss', 'content': 0.09881870448589325, 'timestamp': '2025-10-01 04:34:07.186905', 'step': 17191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:07.218386', 'step': 17191, 'epoch': 3} {'type': 'loss', 'content': 0.06949819624423981, 'timestamp': '2025-10-01 04:34:07.242113', 'step': 17192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:07.278068', 'step': 17192, 'epoch': 3} {'type': 'loss', 'content': 0.08084842562675476, 'timestamp': '2025-10-01 04:34:07.280255', 'step': 17193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:07.312463', 'step': 17193, 'epoch': 3} {'type': 'loss', 'content': 0.04533955827355385, 'timestamp': '2025-10-01 04:34:07.317545', 'step': 17194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:07.348073', 'step': 17194, 'epoch': 3} {'type': 'loss', 'content': 0.0541500560939312, 'timestamp': '2025-10-01 04:34:07.350500', 'step': 17195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:07.384956', 'step': 17195, 'epoch': 3} {'type': 'loss', 'content': 0.1409469097852707, 'timestamp': '2025-10-01 04:34:07.408615', 'step': 17196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:07.439315', 'step': 17196, 'epoch': 3} {'type': 'loss', 'content': 0.0439445823431015, 'timestamp': '2025-10-01 04:34:07.441422', 'step': 17197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:07.471494', 'step': 17197, 'epoch': 3} {'type': 'loss', 'content': 0.05225528031587601, 'timestamp': '2025-10-01 04:34:07.473980', 'step': 17198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:07.504702', 'step': 17198, 'epoch': 3} {'type': 'loss', 'content': 0.0876438245177269, 'timestamp': '2025-10-01 04:34:07.506834', 'step': 17199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:07.536987', 'step': 17199, 'epoch': 3} {'type': 'loss', 'content': 0.10771494358778, 'timestamp': '2025-10-01 04:34:07.560609', 'step': 17200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:07.591090', 'step': 17200, 'epoch': 3} {'type': 'loss', 'content': 0.09662853181362152, 'timestamp': '2025-10-01 04:34:07.593785', 'step': 17201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:07.624806', 'step': 17201, 'epoch': 3} {'type': 'loss', 'content': 0.03959501162171364, 'timestamp': '2025-10-01 04:34:07.626937', 'step': 17202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:07.656855', 'step': 17202, 'epoch': 3} {'type': 'loss', 'content': 0.08105939626693726, 'timestamp': '2025-10-01 04:34:07.659048', 'step': 17203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:07.690020', 'step': 17203, 'epoch': 3} {'type': 'loss', 'content': 0.06873542815446854, 'timestamp': '2025-10-01 04:34:07.717389', 'step': 17204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:07.748147', 'step': 17204, 'epoch': 3} {'type': 'loss', 'content': 0.06335072219371796, 'timestamp': '2025-10-01 04:34:07.750358', 'step': 17205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:07.780937', 'step': 17205, 'epoch': 3} {'type': 'loss', 'content': 0.02494911290705204, 'timestamp': '2025-10-01 04:34:07.783301', 'step': 17206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:07.814017', 'step': 17206, 'epoch': 3} {'type': 'loss', 'content': 0.03280043601989746, 'timestamp': '2025-10-01 04:34:07.819072', 'step': 17207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:07.850028', 'step': 17207, 'epoch': 3} {'type': 'loss', 'content': 0.08551298826932907, 'timestamp': '2025-10-01 04:34:07.873739', 'step': 17208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:07.904599', 'step': 17208, 'epoch': 3} {'type': 'loss', 'content': 0.095000721514225, 'timestamp': '2025-10-01 04:34:07.907126', 'step': 17209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:07.937624', 'step': 17209, 'epoch': 3} {'type': 'loss', 'content': 0.047388654202222824, 'timestamp': '2025-10-01 04:34:07.939973', 'step': 17210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:07.971146', 'step': 17210, 'epoch': 3} {'type': 'loss', 'content': 0.11515601724386215, 'timestamp': '2025-10-01 04:34:07.973213', 'step': 17211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:08.004311', 'step': 17211, 'epoch': 3} {'type': 'loss', 'content': 0.09718641638755798, 'timestamp': '2025-10-01 04:34:08.028035', 'step': 17212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:08.059817', 'step': 17212, 'epoch': 3} {'type': 'loss', 'content': 0.049250051379203796, 'timestamp': '2025-10-01 04:34:08.065847', 'step': 17213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:08.098498', 'step': 17213, 'epoch': 3} {'type': 'loss', 'content': 0.14199621975421906, 'timestamp': '2025-10-01 04:34:08.100757', 'step': 17214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:08.132044', 'step': 17214, 'epoch': 3} {'type': 'loss', 'content': 0.11883626878261566, 'timestamp': '2025-10-01 04:34:08.134399', 'step': 17215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:08.165740', 'step': 17215, 'epoch': 3} {'type': 'loss', 'content': 0.008622153662145138, 'timestamp': '2025-10-01 04:34:08.189932', 'step': 17216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:08.221688', 'step': 17216, 'epoch': 3} {'type': 'loss', 'content': 0.09952826797962189, 'timestamp': '2025-10-01 04:34:08.223870', 'step': 17217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:08.254698', 'step': 17217, 'epoch': 3} {'type': 'loss', 'content': 0.0420440249145031, 'timestamp': '2025-10-01 04:34:08.257286', 'step': 17218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:08.288496', 'step': 17218, 'epoch': 3} {'type': 'loss', 'content': 0.030532414093613625, 'timestamp': '2025-10-01 04:34:08.299744', 'step': 17219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:08.332230', 'step': 17219, 'epoch': 3} {'type': 'loss', 'content': 0.07003186643123627, 'timestamp': '2025-10-01 04:34:08.355983', 'step': 17220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:08.387275', 'step': 17220, 'epoch': 3} {'type': 'loss', 'content': 0.045140936970710754, 'timestamp': '2025-10-01 04:34:08.389383', 'step': 17221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:08.427146', 'step': 17221, 'epoch': 3} {'type': 'loss', 'content': 0.01940871961414814, 'timestamp': '2025-10-01 04:34:08.429655', 'step': 17222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:08.459686', 'step': 17222, 'epoch': 3} {'type': 'loss', 'content': 0.11795271933078766, 'timestamp': '2025-10-01 04:34:08.462086', 'step': 17223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:08.493543', 'step': 17223, 'epoch': 3} {'type': 'loss', 'content': 0.09141476452350616, 'timestamp': '2025-10-01 04:34:08.518040', 'step': 17224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:08.562384', 'step': 17224, 'epoch': 3} {'type': 'loss', 'content': 0.07616356015205383, 'timestamp': '2025-10-01 04:34:08.564538', 'step': 17225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:08.599941', 'step': 17225, 'epoch': 3} {'type': 'loss', 'content': 0.06670787185430527, 'timestamp': '2025-10-01 04:34:08.602443', 'step': 17226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:08.647707', 'step': 17226, 'epoch': 3} {'type': 'loss', 'content': 0.026787301525473595, 'timestamp': '2025-10-01 04:34:08.650115', 'step': 17227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:08.685738', 'step': 17227, 'epoch': 3} {'type': 'loss', 'content': 0.10156520456075668, 'timestamp': '2025-10-01 04:34:08.709323', 'step': 17228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:08.756152', 'step': 17228, 'epoch': 3} {'type': 'loss', 'content': 0.04755458980798721, 'timestamp': '2025-10-01 04:34:08.758365', 'step': 17229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:08.804665', 'step': 17229, 'epoch': 3} {'type': 'loss', 'content': 0.1324428766965866, 'timestamp': '2025-10-01 04:34:08.816763', 'step': 17230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:08.850526', 'step': 17230, 'epoch': 3} {'type': 'loss', 'content': 0.038637515157461166, 'timestamp': '2025-10-01 04:34:08.852749', 'step': 17231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:08.884498', 'step': 17231, 'epoch': 3} {'type': 'loss', 'content': 0.039450738579034805, 'timestamp': '2025-10-01 04:34:08.920305', 'step': 17232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:08.959318', 'step': 17232, 'epoch': 3} {'type': 'loss', 'content': 0.038424063473939896, 'timestamp': '2025-10-01 04:34:08.961387', 'step': 17233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:08.999015', 'step': 17233, 'epoch': 3} {'type': 'loss', 'content': 0.08283249288797379, 'timestamp': '2025-10-01 04:34:09.001239', 'step': 17234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:09.038416', 'step': 17234, 'epoch': 3} {'type': 'loss', 'content': 0.05585359036922455, 'timestamp': '2025-10-01 04:34:09.040720', 'step': 17235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:09.072073', 'step': 17235, 'epoch': 3} {'type': 'loss', 'content': 0.04415340721607208, 'timestamp': '2025-10-01 04:34:09.095927', 'step': 17236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:09.132841', 'step': 17236, 'epoch': 3} {'type': 'loss', 'content': 0.05200701951980591, 'timestamp': '2025-10-01 04:34:09.135014', 'step': 17237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:09.166109', 'step': 17237, 'epoch': 3} {'type': 'loss', 'content': 0.05284561589360237, 'timestamp': '2025-10-01 04:34:09.168167', 'step': 17238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:09.206245', 'step': 17238, 'epoch': 3} {'type': 'loss', 'content': 0.047521140426397324, 'timestamp': '2025-10-01 04:34:09.208326', 'step': 17239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:34:09.241570', 'step': 17239, 'epoch': 3} {'type': 'loss', 'content': 0.010772288776934147, 'timestamp': '2025-10-01 04:34:09.267160', 'step': 17240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:09.298798', 'step': 17240, 'epoch': 3} {'type': 'loss', 'content': 0.08150820434093475, 'timestamp': '2025-10-01 04:34:09.304523', 'step': 17241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:09.336302', 'step': 17241, 'epoch': 3} {'type': 'loss', 'content': 0.030754296109080315, 'timestamp': '2025-10-01 04:34:09.338422', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:34:20.104647', 'step': 17242, 'epoch': 3} {'type': 'pplx', 'content': 12417.653552796266, 'timestamp': '2025-10-01 04:34:20.107612', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:20.140515', 'step': 17242, 'epoch': 3} {'type': 'loss', 'content': 0.06509445607662201, 'timestamp': '2025-10-01 04:34:20.142899', 'step': 17243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:20.173813', 'step': 17243, 'epoch': 3} {'type': 'loss', 'content': 0.11475763469934464, 'timestamp': '2025-10-01 04:34:20.208025', 'step': 17244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:20.239448', 'step': 17244, 'epoch': 3} {'type': 'loss', 'content': 0.10275273025035858, 'timestamp': '2025-10-01 04:34:20.241783', 'step': 17245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:20.276286', 'step': 17245, 'epoch': 3} {'type': 'loss', 'content': 0.036197297275066376, 'timestamp': '2025-10-01 04:34:20.278644', 'step': 17246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:20.313013', 'step': 17246, 'epoch': 3} {'type': 'loss', 'content': 0.058190301060676575, 'timestamp': '2025-10-01 04:34:20.315528', 'step': 17247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:20.346086', 'step': 17247, 'epoch': 3} {'type': 'loss', 'content': 0.028467053547501564, 'timestamp': '2025-10-01 04:34:20.369861', 'step': 17248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:20.404213', 'step': 17248, 'epoch': 3} {'type': 'loss', 'content': 0.09838131070137024, 'timestamp': '2025-10-01 04:34:20.406296', 'step': 17249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:20.437227', 'step': 17249, 'epoch': 3} {'type': 'loss', 'content': 0.06933561712503433, 'timestamp': '2025-10-01 04:34:20.439629', 'step': 17250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:20.480156', 'step': 17250, 'epoch': 3} {'type': 'loss', 'content': 0.046382855623960495, 'timestamp': '2025-10-01 04:34:20.482772', 'step': 17251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:20.515062', 'step': 17251, 'epoch': 3} {'type': 'loss', 'content': 0.14211305975914001, 'timestamp': '2025-10-01 04:34:20.538896', 'step': 17252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:20.574623', 'step': 17252, 'epoch': 3} {'type': 'loss', 'content': 0.07124772667884827, 'timestamp': '2025-10-01 04:34:20.576974', 'step': 17253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:20.615188', 'step': 17253, 'epoch': 3} {'type': 'loss', 'content': 0.05262507125735283, 'timestamp': '2025-10-01 04:34:20.617480', 'step': 17254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:20.654541', 'step': 17254, 'epoch': 3} {'type': 'loss', 'content': 0.07849539816379547, 'timestamp': '2025-10-01 04:34:20.657947', 'step': 17255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:20.698960', 'step': 17255, 'epoch': 3} {'type': 'loss', 'content': 0.05016722157597542, 'timestamp': '2025-10-01 04:34:20.722889', 'step': 17256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:20.756102', 'step': 17256, 'epoch': 3} {'type': 'loss', 'content': 0.06270093470811844, 'timestamp': '2025-10-01 04:34:20.759457', 'step': 17257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:20.798252', 'step': 17257, 'epoch': 3} {'type': 'loss', 'content': 0.022169340401887894, 'timestamp': '2025-10-01 04:34:20.800500', 'step': 17258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:20.832377', 'step': 17258, 'epoch': 3} {'type': 'loss', 'content': 0.029585232958197594, 'timestamp': '2025-10-01 04:34:20.835388', 'step': 17259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:20.875211', 'step': 17259, 'epoch': 3} {'type': 'loss', 'content': 0.11855313181877136, 'timestamp': '2025-10-01 04:34:20.899289', 'step': 17260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:20.937802', 'step': 17260, 'epoch': 3} {'type': 'loss', 'content': 0.042792562395334244, 'timestamp': '2025-10-01 04:34:20.940195', 'step': 17261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:20.975700', 'step': 17261, 'epoch': 3} {'type': 'loss', 'content': 0.0967092216014862, 'timestamp': '2025-10-01 04:34:20.977932', 'step': 17262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:21.012341', 'step': 17262, 'epoch': 3} {'type': 'loss', 'content': 0.08579723536968231, 'timestamp': '2025-10-01 04:34:21.014545', 'step': 17263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:21.058853', 'step': 17263, 'epoch': 3} {'type': 'loss', 'content': 0.030512385070323944, 'timestamp': '2025-10-01 04:34:21.082802', 'step': 17264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:21.115375', 'step': 17264, 'epoch': 3} {'type': 'loss', 'content': 0.07704794406890869, 'timestamp': '2025-10-01 04:34:21.117636', 'step': 17265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:21.155089', 'step': 17265, 'epoch': 3} {'type': 'loss', 'content': 0.08135560899972916, 'timestamp': '2025-10-01 04:34:21.157546', 'step': 17266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:21.201958', 'step': 17266, 'epoch': 3} {'type': 'loss', 'content': 0.01809651590883732, 'timestamp': '2025-10-01 04:34:21.204243', 'step': 17267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:21.238621', 'step': 17267, 'epoch': 3} {'type': 'loss', 'content': 0.0631260946393013, 'timestamp': '2025-10-01 04:34:21.262632', 'step': 17268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:21.305945', 'step': 17268, 'epoch': 3} {'type': 'loss', 'content': 0.10824239253997803, 'timestamp': '2025-10-01 04:34:21.308231', 'step': 17269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:21.352240', 'step': 17269, 'epoch': 3} {'type': 'loss', 'content': 0.07238295674324036, 'timestamp': '2025-10-01 04:34:21.354573', 'step': 17270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:21.392165', 'step': 17270, 'epoch': 3} {'type': 'loss', 'content': 0.09912259876728058, 'timestamp': '2025-10-01 04:34:21.394620', 'step': 17271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:21.428237', 'step': 17271, 'epoch': 3} {'type': 'loss', 'content': 0.04174153134226799, 'timestamp': '2025-10-01 04:34:21.452168', 'step': 17272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:21.485316', 'step': 17272, 'epoch': 3} {'type': 'loss', 'content': 0.030537564307451248, 'timestamp': '2025-10-01 04:34:21.487740', 'step': 17273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:21.520860', 'step': 17273, 'epoch': 3} {'type': 'loss', 'content': 0.0133687574416399, 'timestamp': '2025-10-01 04:34:21.523283', 'step': 17274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:21.557884', 'step': 17274, 'epoch': 3} {'type': 'loss', 'content': 0.036177169531583786, 'timestamp': '2025-10-01 04:34:21.560221', 'step': 17275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:21.598699', 'step': 17275, 'epoch': 3} {'type': 'loss', 'content': 0.022764181718230247, 'timestamp': '2025-10-01 04:34:21.622753', 'step': 17276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:21.654516', 'step': 17276, 'epoch': 3} {'type': 'loss', 'content': 0.07503598183393478, 'timestamp': '2025-10-01 04:34:21.664271', 'step': 17277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:21.697507', 'step': 17277, 'epoch': 3} {'type': 'loss', 'content': 0.05722912400960922, 'timestamp': '2025-10-01 04:34:21.700328', 'step': 17278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:21.734388', 'step': 17278, 'epoch': 3} {'type': 'loss', 'content': 0.09106064587831497, 'timestamp': '2025-10-01 04:34:21.736752', 'step': 17279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:21.772537', 'step': 17279, 'epoch': 3} {'type': 'loss', 'content': 0.10438810288906097, 'timestamp': '2025-10-01 04:34:21.796698', 'step': 17280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:21.836710', 'step': 17280, 'epoch': 3} {'type': 'loss', 'content': 0.05184359848499298, 'timestamp': '2025-10-01 04:34:21.839250', 'step': 17281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:21.874854', 'step': 17281, 'epoch': 3} {'type': 'loss', 'content': 0.038599539548158646, 'timestamp': '2025-10-01 04:34:21.877274', 'step': 17282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:21.911908', 'step': 17282, 'epoch': 3} {'type': 'loss', 'content': 0.10606649518013, 'timestamp': '2025-10-01 04:34:21.914138', 'step': 17283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:34:21.945973', 'step': 17283, 'epoch': 3} {'type': 'loss', 'content': 0.15519383549690247, 'timestamp': '2025-10-01 04:34:21.971408', 'step': 17284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:22.007887', 'step': 17284, 'epoch': 3} {'type': 'loss', 'content': 0.07197486609220505, 'timestamp': '2025-10-01 04:34:22.010164', 'step': 17285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:22.042733', 'step': 17285, 'epoch': 3} {'type': 'loss', 'content': 0.09019642323255539, 'timestamp': '2025-10-01 04:34:22.045141', 'step': 17286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:22.077166', 'step': 17286, 'epoch': 3} {'type': 'loss', 'content': 0.13076630234718323, 'timestamp': '2025-10-01 04:34:22.080036', 'step': 17287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:22.112215', 'step': 17287, 'epoch': 3} {'type': 'loss', 'content': 0.017093919217586517, 'timestamp': '2025-10-01 04:34:22.136060', 'step': 17288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:22.168366', 'step': 17288, 'epoch': 3} {'type': 'loss', 'content': 0.09052444249391556, 'timestamp': '2025-10-01 04:34:22.170745', 'step': 17289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:22.208434', 'step': 17289, 'epoch': 3} {'type': 'loss', 'content': 0.043917424976825714, 'timestamp': '2025-10-01 04:34:22.210947', 'step': 17290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:22.245087', 'step': 17290, 'epoch': 3} {'type': 'loss', 'content': 0.03478918597102165, 'timestamp': '2025-10-01 04:34:22.247405', 'step': 17291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:22.278162', 'step': 17291, 'epoch': 3} {'type': 'loss', 'content': 0.05949360504746437, 'timestamp': '2025-10-01 04:34:22.301978', 'step': 17292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:22.346681', 'step': 17292, 'epoch': 3} {'type': 'loss', 'content': 0.09128764271736145, 'timestamp': '2025-10-01 04:34:22.349523', 'step': 17293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:22.385026', 'step': 17293, 'epoch': 3} {'type': 'loss', 'content': 0.05715678632259369, 'timestamp': '2025-10-01 04:34:22.387456', 'step': 17294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:22.425174', 'step': 17294, 'epoch': 3} {'type': 'loss', 'content': 0.05221021920442581, 'timestamp': '2025-10-01 04:34:22.427470', 'step': 17295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:22.463447', 'step': 17295, 'epoch': 3} {'type': 'loss', 'content': 0.09906499087810516, 'timestamp': '2025-10-01 04:34:22.487399', 'step': 17296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:22.522394', 'step': 17296, 'epoch': 3} {'type': 'loss', 'content': 0.05487706512212753, 'timestamp': '2025-10-01 04:34:22.524660', 'step': 17297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:22.557382', 'step': 17297, 'epoch': 3} {'type': 'loss', 'content': 0.058272507041692734, 'timestamp': '2025-10-01 04:34:22.559659', 'step': 17298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:22.605115', 'step': 17298, 'epoch': 3} {'type': 'loss', 'content': 0.03205638378858566, 'timestamp': '2025-10-01 04:34:22.607230', 'step': 17299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:22.646381', 'step': 17299, 'epoch': 3} {'type': 'loss', 'content': 0.046748388558626175, 'timestamp': '2025-10-01 04:34:22.670252', 'step': 17300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:22.702795', 'step': 17300, 'epoch': 3} {'type': 'loss', 'content': 0.08243677020072937, 'timestamp': '2025-10-01 04:34:22.705003', 'step': 17301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:22.739035', 'step': 17301, 'epoch': 3} {'type': 'loss', 'content': 0.06966372579336166, 'timestamp': '2025-10-01 04:34:22.741535', 'step': 17302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:22.775209', 'step': 17302, 'epoch': 3} {'type': 'loss', 'content': 0.07637261599302292, 'timestamp': '2025-10-01 04:34:22.777735', 'step': 17303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:22.809148', 'step': 17303, 'epoch': 3} {'type': 'loss', 'content': 0.10302641987800598, 'timestamp': '2025-10-01 04:34:22.835121', 'step': 17304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:22.866916', 'step': 17304, 'epoch': 3} {'type': 'loss', 'content': 0.02481374330818653, 'timestamp': '2025-10-01 04:34:22.869400', 'step': 17305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:22.900467', 'step': 17305, 'epoch': 3} {'type': 'loss', 'content': 0.06022433191537857, 'timestamp': '2025-10-01 04:34:22.903003', 'step': 17306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:22.934518', 'step': 17306, 'epoch': 3} {'type': 'loss', 'content': 0.09511399269104004, 'timestamp': '2025-10-01 04:34:22.936894', 'step': 17307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:22.967911', 'step': 17307, 'epoch': 3} {'type': 'loss', 'content': 0.011507687158882618, 'timestamp': '2025-10-01 04:34:22.991995', 'step': 17308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:23.024347', 'step': 17308, 'epoch': 3} {'type': 'loss', 'content': 0.09031455218791962, 'timestamp': '2025-10-01 04:34:23.026668', 'step': 17309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:23.065962', 'step': 17309, 'epoch': 3} {'type': 'loss', 'content': 0.08891157060861588, 'timestamp': '2025-10-01 04:34:23.068169', 'step': 17310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:23.100597', 'step': 17310, 'epoch': 3} {'type': 'loss', 'content': 0.12381837517023087, 'timestamp': '2025-10-01 04:34:23.103207', 'step': 17311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:23.136845', 'step': 17311, 'epoch': 3} {'type': 'loss', 'content': 0.10560011118650436, 'timestamp': '2025-10-01 04:34:23.160624', 'step': 17312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:23.197214', 'step': 17312, 'epoch': 3} {'type': 'loss', 'content': 0.09370949864387512, 'timestamp': '2025-10-01 04:34:23.199497', 'step': 17313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:23.234447', 'step': 17313, 'epoch': 3} {'type': 'loss', 'content': 0.04130746051669121, 'timestamp': '2025-10-01 04:34:23.236775', 'step': 17314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:23.270700', 'step': 17314, 'epoch': 3} {'type': 'loss', 'content': 0.10775631666183472, 'timestamp': '2025-10-01 04:34:23.273003', 'step': 17315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:23.306614', 'step': 17315, 'epoch': 3} {'type': 'loss', 'content': 0.0806196928024292, 'timestamp': '2025-10-01 04:34:23.330504', 'step': 17316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:23.369578', 'step': 17316, 'epoch': 3} {'type': 'loss', 'content': 0.030525997281074524, 'timestamp': '2025-10-01 04:34:23.371868', 'step': 17317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:23.402951', 'step': 17317, 'epoch': 3} {'type': 'loss', 'content': 0.02648991346359253, 'timestamp': '2025-10-01 04:34:23.405263', 'step': 17318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:23.435786', 'step': 17318, 'epoch': 3} {'type': 'loss', 'content': 0.11151262372732162, 'timestamp': '2025-10-01 04:34:23.439496', 'step': 17319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:23.473365', 'step': 17319, 'epoch': 3} {'type': 'loss', 'content': 0.04282088950276375, 'timestamp': '2025-10-01 04:34:23.497127', 'step': 17320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:23.530422', 'step': 17320, 'epoch': 3} {'type': 'loss', 'content': 0.054648857563734055, 'timestamp': '2025-10-01 04:34:23.533652', 'step': 17321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:23.565497', 'step': 17321, 'epoch': 3} {'type': 'loss', 'content': 0.05709722638130188, 'timestamp': '2025-10-01 04:34:23.568432', 'step': 17322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:23.602715', 'step': 17322, 'epoch': 3} {'type': 'loss', 'content': 0.033494967967271805, 'timestamp': '2025-10-01 04:34:23.605862', 'step': 17323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:23.637160', 'step': 17323, 'epoch': 3} {'type': 'loss', 'content': 0.05333369970321655, 'timestamp': '2025-10-01 04:34:23.661027', 'step': 17324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:23.697087', 'step': 17324, 'epoch': 3} {'type': 'loss', 'content': 0.028260953724384308, 'timestamp': '2025-10-01 04:34:23.699222', 'step': 17325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:23.733141', 'step': 17325, 'epoch': 3} {'type': 'loss', 'content': 0.04556186869740486, 'timestamp': '2025-10-01 04:34:23.735659', 'step': 17326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:23.778305', 'step': 17326, 'epoch': 3} {'type': 'loss', 'content': 0.05117451027035713, 'timestamp': '2025-10-01 04:34:23.781366', 'step': 17327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:23.814766', 'step': 17327, 'epoch': 3} {'type': 'loss', 'content': 0.03760030120611191, 'timestamp': '2025-10-01 04:34:23.838537', 'step': 17328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:23.872802', 'step': 17328, 'epoch': 3} {'type': 'loss', 'content': 0.13250821828842163, 'timestamp': '2025-10-01 04:34:23.875151', 'step': 17329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:23.906687', 'step': 17329, 'epoch': 3} {'type': 'loss', 'content': 0.08065656572580338, 'timestamp': '2025-10-01 04:34:23.911043', 'step': 17330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:23.946317', 'step': 17330, 'epoch': 3} {'type': 'loss', 'content': 0.11497252434492111, 'timestamp': '2025-10-01 04:34:23.948639', 'step': 17331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:23.983181', 'step': 17331, 'epoch': 3} {'type': 'loss', 'content': 0.09762749820947647, 'timestamp': '2025-10-01 04:34:24.007010', 'step': 17332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.039681', 'step': 17332, 'epoch': 3} {'type': 'loss', 'content': 0.035005856305360794, 'timestamp': '2025-10-01 04:34:24.041853', 'step': 17333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:24.072130', 'step': 17333, 'epoch': 3} {'type': 'loss', 'content': 0.07467224448919296, 'timestamp': '2025-10-01 04:34:24.074510', 'step': 17334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:24.112798', 'step': 17334, 'epoch': 3} {'type': 'loss', 'content': 0.04836415499448776, 'timestamp': '2025-10-01 04:34:24.115166', 'step': 17335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:24.147028', 'step': 17335, 'epoch': 3} {'type': 'loss', 'content': 0.12580746412277222, 'timestamp': '2025-10-01 04:34:24.170741', 'step': 17336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:24.207007', 'step': 17336, 'epoch': 3} {'type': 'loss', 'content': 0.09405813366174698, 'timestamp': '2025-10-01 04:34:24.209291', 'step': 17337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:24.241751', 'step': 17337, 'epoch': 3} {'type': 'loss', 'content': 0.08991090953350067, 'timestamp': '2025-10-01 04:34:24.244175', 'step': 17338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.278312', 'step': 17338, 'epoch': 3} {'type': 'loss', 'content': 0.07692930102348328, 'timestamp': '2025-10-01 04:34:24.283408', 'step': 17339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.322742', 'step': 17339, 'epoch': 3} {'type': 'loss', 'content': 0.09895442426204681, 'timestamp': '2025-10-01 04:34:24.346459', 'step': 17340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:24.380222', 'step': 17340, 'epoch': 3} {'type': 'loss', 'content': 0.06764287501573563, 'timestamp': '2025-10-01 04:34:24.383361', 'step': 17341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:24.425706', 'step': 17341, 'epoch': 3} {'type': 'loss', 'content': 0.08285769075155258, 'timestamp': '2025-10-01 04:34:24.434879', 'step': 17342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:24.471055', 'step': 17342, 'epoch': 3} {'type': 'loss', 'content': 0.09945517033338547, 'timestamp': '2025-10-01 04:34:24.473552', 'step': 17343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:24.506552', 'step': 17343, 'epoch': 3} {'type': 'loss', 'content': 0.04892100393772125, 'timestamp': '2025-10-01 04:34:24.530412', 'step': 17344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.563400', 'step': 17344, 'epoch': 3} {'type': 'loss', 'content': 0.12670227885246277, 'timestamp': '2025-10-01 04:34:24.569037', 'step': 17345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.599503', 'step': 17345, 'epoch': 3} {'type': 'loss', 'content': 0.06512797623872757, 'timestamp': '2025-10-01 04:34:24.601851', 'step': 17346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.639641', 'step': 17346, 'epoch': 3} {'type': 'loss', 'content': 0.053410351276397705, 'timestamp': '2025-10-01 04:34:24.642136', 'step': 17347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:24.678427', 'step': 17347, 'epoch': 3} {'type': 'loss', 'content': 0.06824803352355957, 'timestamp': '2025-10-01 04:34:24.704689', 'step': 17348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.738194', 'step': 17348, 'epoch': 3} {'type': 'loss', 'content': 0.038553863763809204, 'timestamp': '2025-10-01 04:34:24.740840', 'step': 17349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:24.771881', 'step': 17349, 'epoch': 3} {'type': 'loss', 'content': 0.06462450325489044, 'timestamp': '2025-10-01 04:34:24.774352', 'step': 17350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.812204', 'step': 17350, 'epoch': 3} {'type': 'loss', 'content': 0.13376760482788086, 'timestamp': '2025-10-01 04:34:24.814777', 'step': 17351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.846985', 'step': 17351, 'epoch': 3} {'type': 'loss', 'content': 0.06791377067565918, 'timestamp': '2025-10-01 04:34:24.871085', 'step': 17352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.912738', 'step': 17352, 'epoch': 3} {'type': 'loss', 'content': 0.08395934104919434, 'timestamp': '2025-10-01 04:34:24.917578', 'step': 17353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:24.952099', 'step': 17353, 'epoch': 3} {'type': 'loss', 'content': 0.06449347734451294, 'timestamp': '2025-10-01 04:34:24.954733', 'step': 17354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:24.988211', 'step': 17354, 'epoch': 3} {'type': 'loss', 'content': 0.1458405703306198, 'timestamp': '2025-10-01 04:34:24.990937', 'step': 17355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:25.022052', 'step': 17355, 'epoch': 3} {'type': 'loss', 'content': 0.07195146381855011, 'timestamp': '2025-10-01 04:34:25.046310', 'step': 17356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:25.077070', 'step': 17356, 'epoch': 3} {'type': 'loss', 'content': 0.04110424220561981, 'timestamp': '2025-10-01 04:34:25.079612', 'step': 17357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:25.117063', 'step': 17357, 'epoch': 3} {'type': 'loss', 'content': 0.08274636417627335, 'timestamp': '2025-10-01 04:34:25.119361', 'step': 17358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:25.151589', 'step': 17358, 'epoch': 3} {'type': 'loss', 'content': 0.0888151228427887, 'timestamp': '2025-10-01 04:34:25.154183', 'step': 17359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:25.190949', 'step': 17359, 'epoch': 3} {'type': 'loss', 'content': 0.11325960606336594, 'timestamp': '2025-10-01 04:34:25.215565', 'step': 17360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:25.254136', 'step': 17360, 'epoch': 3} {'type': 'loss', 'content': 0.09389282017946243, 'timestamp': '2025-10-01 04:34:25.258627', 'step': 17361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:25.291960', 'step': 17361, 'epoch': 3} {'type': 'loss', 'content': 0.09387578815221786, 'timestamp': '2025-10-01 04:34:25.308952', 'step': 17362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:25.344554', 'step': 17362, 'epoch': 3} {'type': 'loss', 'content': 0.0639973133802414, 'timestamp': '2025-10-01 04:34:25.350113', 'step': 17363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:25.387275', 'step': 17363, 'epoch': 3} {'type': 'loss', 'content': 0.08033692836761475, 'timestamp': '2025-10-01 04:34:25.411601', 'step': 17364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:25.444737', 'step': 17364, 'epoch': 3} {'type': 'loss', 'content': 0.002391072688624263, 'timestamp': '2025-10-01 04:34:25.447944', 'step': 17365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:25.482356', 'step': 17365, 'epoch': 3} {'type': 'loss', 'content': 0.0917036160826683, 'timestamp': '2025-10-01 04:34:25.489086', 'step': 17366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:25.530007', 'step': 17366, 'epoch': 3} {'type': 'loss', 'content': 0.012329135090112686, 'timestamp': '2025-10-01 04:34:25.532669', 'step': 17367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:25.565427', 'step': 17367, 'epoch': 3} {'type': 'loss', 'content': 0.0918487012386322, 'timestamp': '2025-10-01 04:34:25.589807', 'step': 17368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:25.636116', 'step': 17368, 'epoch': 3} {'type': 'loss', 'content': 0.04625800997018814, 'timestamp': '2025-10-01 04:34:25.638638', 'step': 17369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:25.671260', 'step': 17369, 'epoch': 3} {'type': 'loss', 'content': 0.06731531023979187, 'timestamp': '2025-10-01 04:34:25.673769', 'step': 17370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:25.706787', 'step': 17370, 'epoch': 3} {'type': 'loss', 'content': 0.03607609495520592, 'timestamp': '2025-10-01 04:34:25.709196', 'step': 17371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:25.742920', 'step': 17371, 'epoch': 3} {'type': 'loss', 'content': 0.07554639875888824, 'timestamp': '2025-10-01 04:34:25.767606', 'step': 17372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:25.800833', 'step': 17372, 'epoch': 3} {'type': 'loss', 'content': 0.14511172473430634, 'timestamp': '2025-10-01 04:34:25.804988', 'step': 17373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:25.849011', 'step': 17373, 'epoch': 3} {'type': 'loss', 'content': 0.019446296617388725, 'timestamp': '2025-10-01 04:34:25.856765', 'step': 17374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:25.889627', 'step': 17374, 'epoch': 3} {'type': 'loss', 'content': 0.07961861789226532, 'timestamp': '2025-10-01 04:34:25.893278', 'step': 17375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:25.925300', 'step': 17375, 'epoch': 3} {'type': 'loss', 'content': 0.06841123104095459, 'timestamp': '2025-10-01 04:34:25.949506', 'step': 17376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:25.995844', 'step': 17376, 'epoch': 3} {'type': 'loss', 'content': 0.03846001252532005, 'timestamp': '2025-10-01 04:34:25.998375', 'step': 17377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:26.029612', 'step': 17377, 'epoch': 3} {'type': 'loss', 'content': 0.06664029508829117, 'timestamp': '2025-10-01 04:34:26.032404', 'step': 17378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:26.063691', 'step': 17378, 'epoch': 3} {'type': 'loss', 'content': 0.04675464332103729, 'timestamp': '2025-10-01 04:34:26.066440', 'step': 17379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:26.100410', 'step': 17379, 'epoch': 3} {'type': 'loss', 'content': 0.07292541116476059, 'timestamp': '2025-10-01 04:34:26.125372', 'step': 17380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:26.157255', 'step': 17380, 'epoch': 3} {'type': 'loss', 'content': 0.06691072136163712, 'timestamp': '2025-10-01 04:34:26.159897', 'step': 17381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:26.191940', 'step': 17381, 'epoch': 3} {'type': 'loss', 'content': 0.05895441398024559, 'timestamp': '2025-10-01 04:34:26.194241', 'step': 17382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:26.227578', 'step': 17382, 'epoch': 3} {'type': 'loss', 'content': 0.11460212618112564, 'timestamp': '2025-10-01 04:34:26.230429', 'step': 17383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:26.263031', 'step': 17383, 'epoch': 3} {'type': 'loss', 'content': 0.04728013277053833, 'timestamp': '2025-10-01 04:34:26.287200', 'step': 17384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:26.320256', 'step': 17384, 'epoch': 3} {'type': 'loss', 'content': 0.09152568876743317, 'timestamp': '2025-10-01 04:34:26.322608', 'step': 17385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:26.356246', 'step': 17385, 'epoch': 3} {'type': 'loss', 'content': 0.04397265985608101, 'timestamp': '2025-10-01 04:34:26.358896', 'step': 17386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:26.391180', 'step': 17386, 'epoch': 3} {'type': 'loss', 'content': 0.030447475612163544, 'timestamp': '2025-10-01 04:34:26.393569', 'step': 17387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:26.432967', 'step': 17387, 'epoch': 3} {'type': 'loss', 'content': 0.1418904960155487, 'timestamp': '2025-10-01 04:34:26.456741', 'step': 17388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:26.494801', 'step': 17388, 'epoch': 3} {'type': 'loss', 'content': 0.05345331132411957, 'timestamp': '2025-10-01 04:34:26.497107', 'step': 17389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:26.528806', 'step': 17389, 'epoch': 3} {'type': 'loss', 'content': 0.2132081240415573, 'timestamp': '2025-10-01 04:34:26.531572', 'step': 17390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:26.577911', 'step': 17390, 'epoch': 3} {'type': 'loss', 'content': 0.08325621485710144, 'timestamp': '2025-10-01 04:34:26.580454', 'step': 17391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:26.615920', 'step': 17391, 'epoch': 3} {'type': 'loss', 'content': 0.10675482451915741, 'timestamp': '2025-10-01 04:34:26.642768', 'step': 17392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:26.677201', 'step': 17392, 'epoch': 3} {'type': 'loss', 'content': 0.046055860817432404, 'timestamp': '2025-10-01 04:34:26.679542', 'step': 17393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:26.717463', 'step': 17393, 'epoch': 3} {'type': 'loss', 'content': 0.114154152572155, 'timestamp': '2025-10-01 04:34:26.720192', 'step': 17394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:26.774033', 'step': 17394, 'epoch': 3} {'type': 'loss', 'content': 0.023160617798566818, 'timestamp': '2025-10-01 04:34:26.777158', 'step': 17395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:26.818227', 'step': 17395, 'epoch': 3} {'type': 'loss', 'content': 0.10284377634525299, 'timestamp': '2025-10-01 04:34:26.842027', 'step': 17396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:26.873044', 'step': 17396, 'epoch': 3} {'type': 'loss', 'content': 0.0784527137875557, 'timestamp': '2025-10-01 04:34:26.875628', 'step': 17397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:26.909301', 'step': 17397, 'epoch': 3} {'type': 'loss', 'content': 0.026033926755189896, 'timestamp': '2025-10-01 04:34:26.915791', 'step': 17398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:26.948854', 'step': 17398, 'epoch': 3} {'type': 'loss', 'content': 0.03921962529420853, 'timestamp': '2025-10-01 04:34:26.951414', 'step': 17399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:26.987405', 'step': 17399, 'epoch': 3} {'type': 'loss', 'content': 0.04124119505286217, 'timestamp': '2025-10-01 04:34:27.011202', 'step': 17400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:27.046179', 'step': 17400, 'epoch': 3} {'type': 'loss', 'content': 0.10537704825401306, 'timestamp': '2025-10-01 04:34:27.048501', 'step': 17401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:27.085620', 'step': 17401, 'epoch': 3} {'type': 'loss', 'content': 0.04366470128297806, 'timestamp': '2025-10-01 04:34:27.088877', 'step': 17402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:27.121405', 'step': 17402, 'epoch': 3} {'type': 'loss', 'content': 0.12515559792518616, 'timestamp': '2025-10-01 04:34:27.124650', 'step': 17403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:27.156926', 'step': 17403, 'epoch': 3} {'type': 'loss', 'content': 0.08837146311998367, 'timestamp': '2025-10-01 04:34:27.180713', 'step': 17404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:27.213573', 'step': 17404, 'epoch': 3} {'type': 'loss', 'content': 0.06915701925754547, 'timestamp': '2025-10-01 04:34:27.216509', 'step': 17405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:27.249283', 'step': 17405, 'epoch': 3} {'type': 'loss', 'content': 0.06560521572828293, 'timestamp': '2025-10-01 04:34:27.251770', 'step': 17406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:27.286541', 'step': 17406, 'epoch': 3} {'type': 'loss', 'content': 0.05965625122189522, 'timestamp': '2025-10-01 04:34:27.290206', 'step': 17407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:27.326335', 'step': 17407, 'epoch': 3} {'type': 'loss', 'content': 0.03528755158185959, 'timestamp': '2025-10-01 04:34:27.351141', 'step': 17408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:27.390200', 'step': 17408, 'epoch': 3} {'type': 'loss', 'content': 0.02608843706548214, 'timestamp': '2025-10-01 04:34:27.393029', 'step': 17409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:27.433013', 'step': 17409, 'epoch': 3} {'type': 'loss', 'content': 0.04233913868665695, 'timestamp': '2025-10-01 04:34:27.435370', 'step': 17410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:27.482608', 'step': 17410, 'epoch': 3} {'type': 'loss', 'content': 0.10889825224876404, 'timestamp': '2025-10-01 04:34:27.489971', 'step': 17411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:27.529845', 'step': 17411, 'epoch': 3} {'type': 'loss', 'content': 0.10585594177246094, 'timestamp': '2025-10-01 04:34:27.554015', 'step': 17412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:27.591085', 'step': 17412, 'epoch': 3} {'type': 'loss', 'content': 0.017575949430465698, 'timestamp': '2025-10-01 04:34:27.593459', 'step': 17413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:27.629620', 'step': 17413, 'epoch': 3} {'type': 'loss', 'content': 0.1389392763376236, 'timestamp': '2025-10-01 04:34:27.632562', 'step': 17414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:27.665285', 'step': 17414, 'epoch': 3} {'type': 'loss', 'content': 0.10081247240304947, 'timestamp': '2025-10-01 04:34:27.668189', 'step': 17415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:27.701543', 'step': 17415, 'epoch': 3} {'type': 'loss', 'content': 0.07357293367385864, 'timestamp': '2025-10-01 04:34:27.725929', 'step': 17416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:27.757162', 'step': 17416, 'epoch': 3} {'type': 'loss', 'content': 0.038732338696718216, 'timestamp': '2025-10-01 04:34:27.759385', 'step': 17417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:27.793767', 'step': 17417, 'epoch': 3} {'type': 'loss', 'content': 0.10162326693534851, 'timestamp': '2025-10-01 04:34:27.796624', 'step': 17418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:27.830582', 'step': 17418, 'epoch': 3} {'type': 'loss', 'content': 0.03153632581233978, 'timestamp': '2025-10-01 04:34:27.832817', 'step': 17419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:27.867131', 'step': 17419, 'epoch': 3} {'type': 'loss', 'content': 0.06971661746501923, 'timestamp': '2025-10-01 04:34:27.891889', 'step': 17420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:27.925207', 'step': 17420, 'epoch': 3} {'type': 'loss', 'content': 0.10566820204257965, 'timestamp': '2025-10-01 04:34:27.933147', 'step': 17421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:27.973334', 'step': 17421, 'epoch': 3} {'type': 'loss', 'content': 0.07895785570144653, 'timestamp': '2025-10-01 04:34:27.975635', 'step': 17422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:28.011369', 'step': 17422, 'epoch': 3} {'type': 'loss', 'content': 0.04314721003174782, 'timestamp': '2025-10-01 04:34:28.014189', 'step': 17423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:28.045682', 'step': 17423, 'epoch': 3} {'type': 'loss', 'content': 0.0366678386926651, 'timestamp': '2025-10-01 04:34:28.070792', 'step': 17424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:28.108498', 'step': 17424, 'epoch': 3} {'type': 'loss', 'content': 0.07029063999652863, 'timestamp': '2025-10-01 04:34:28.112179', 'step': 17425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:28.145760', 'step': 17425, 'epoch': 3} {'type': 'loss', 'content': 0.02208208665251732, 'timestamp': '2025-10-01 04:34:28.148936', 'step': 17426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:28.184412', 'step': 17426, 'epoch': 3} {'type': 'loss', 'content': 0.03274525701999664, 'timestamp': '2025-10-01 04:34:28.187206', 'step': 17427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:28.224607', 'step': 17427, 'epoch': 3} {'type': 'loss', 'content': 0.14228248596191406, 'timestamp': '2025-10-01 04:34:28.248374', 'step': 17428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:28.288021', 'step': 17428, 'epoch': 3} {'type': 'loss', 'content': 0.028077220544219017, 'timestamp': '2025-10-01 04:34:28.291793', 'step': 17429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:28.325682', 'step': 17429, 'epoch': 3} {'type': 'loss', 'content': 0.04088262468576431, 'timestamp': '2025-10-01 04:34:28.327913', 'step': 17430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:28.362472', 'step': 17430, 'epoch': 3} {'type': 'loss', 'content': 0.0634128600358963, 'timestamp': '2025-10-01 04:34:28.365525', 'step': 17431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:28.397695', 'step': 17431, 'epoch': 3} {'type': 'loss', 'content': 0.03088403306901455, 'timestamp': '2025-10-01 04:34:28.421466', 'step': 17432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:28.453846', 'step': 17432, 'epoch': 3} {'type': 'loss', 'content': 0.14095574617385864, 'timestamp': '2025-10-01 04:34:28.456848', 'step': 17433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:28.497679', 'step': 17433, 'epoch': 3} {'type': 'loss', 'content': 0.05925709754228592, 'timestamp': '2025-10-01 04:34:28.499850', 'step': 17434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:28.535971', 'step': 17434, 'epoch': 3} {'type': 'loss', 'content': 0.09563123434782028, 'timestamp': '2025-10-01 04:34:28.538347', 'step': 17435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:28.570416', 'step': 17435, 'epoch': 3} {'type': 'loss', 'content': 0.05411389470100403, 'timestamp': '2025-10-01 04:34:28.595309', 'step': 17436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:28.638035', 'step': 17436, 'epoch': 3} {'type': 'loss', 'content': 0.062815822660923, 'timestamp': '2025-10-01 04:34:28.640332', 'step': 17437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:28.671262', 'step': 17437, 'epoch': 3} {'type': 'loss', 'content': 0.038976479321718216, 'timestamp': '2025-10-01 04:34:28.673528', 'step': 17438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:28.704846', 'step': 17438, 'epoch': 3} {'type': 'loss', 'content': 0.07697925716638565, 'timestamp': '2025-10-01 04:34:28.707436', 'step': 17439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:28.738841', 'step': 17439, 'epoch': 3} {'type': 'loss', 'content': 0.057939451187849045, 'timestamp': '2025-10-01 04:34:28.762783', 'step': 17440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:28.798512', 'step': 17440, 'epoch': 3} {'type': 'loss', 'content': 0.09616150707006454, 'timestamp': '2025-10-01 04:34:28.806931', 'step': 17441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:28.842114', 'step': 17441, 'epoch': 3} {'type': 'loss', 'content': 0.06692840158939362, 'timestamp': '2025-10-01 04:34:28.844430', 'step': 17442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:28.879167', 'step': 17442, 'epoch': 3} {'type': 'loss', 'content': 0.05729054659605026, 'timestamp': '2025-10-01 04:34:28.883071', 'step': 17443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:28.918058', 'step': 17443, 'epoch': 3} {'type': 'loss', 'content': 0.04896446317434311, 'timestamp': '2025-10-01 04:34:28.941698', 'step': 17444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:28.986283', 'step': 17444, 'epoch': 3} {'type': 'loss', 'content': 0.02999398671090603, 'timestamp': '2025-10-01 04:34:28.988643', 'step': 17445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:29.031641', 'step': 17445, 'epoch': 3} {'type': 'loss', 'content': 0.0453314371407032, 'timestamp': '2025-10-01 04:34:29.033918', 'step': 17446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:29.066021', 'step': 17446, 'epoch': 3} {'type': 'loss', 'content': 0.038910433650016785, 'timestamp': '2025-10-01 04:34:29.068887', 'step': 17447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:29.102916', 'step': 17447, 'epoch': 3} {'type': 'loss', 'content': 0.09609133005142212, 'timestamp': '2025-10-01 04:34:29.127002', 'step': 17448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:29.158923', 'step': 17448, 'epoch': 3} {'type': 'loss', 'content': 0.07343174517154694, 'timestamp': '2025-10-01 04:34:29.161350', 'step': 17449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:29.200155', 'step': 17449, 'epoch': 3} {'type': 'loss', 'content': 0.09185484051704407, 'timestamp': '2025-10-01 04:34:29.202548', 'step': 17450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:29.235080', 'step': 17450, 'epoch': 3} {'type': 'loss', 'content': 0.10464176535606384, 'timestamp': '2025-10-01 04:34:29.237449', 'step': 17451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:29.269351', 'step': 17451, 'epoch': 3} {'type': 'loss', 'content': 0.07485528290271759, 'timestamp': '2025-10-01 04:34:29.293066', 'step': 17452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:29.335895', 'step': 17452, 'epoch': 3} {'type': 'loss', 'content': 0.06446990370750427, 'timestamp': '2025-10-01 04:34:29.338202', 'step': 17453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:29.372008', 'step': 17453, 'epoch': 3} {'type': 'loss', 'content': 0.07068667560815811, 'timestamp': '2025-10-01 04:34:29.374232', 'step': 17454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:29.408173', 'step': 17454, 'epoch': 3} {'type': 'loss', 'content': 0.14652173221111298, 'timestamp': '2025-10-01 04:34:29.410713', 'step': 17455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:29.445054', 'step': 17455, 'epoch': 3} {'type': 'loss', 'content': 0.0858515128493309, 'timestamp': '2025-10-01 04:34:29.469498', 'step': 17456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:29.501860', 'step': 17456, 'epoch': 3} {'type': 'loss', 'content': 0.015058177523314953, 'timestamp': '2025-10-01 04:34:29.504177', 'step': 17457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:29.536961', 'step': 17457, 'epoch': 3} {'type': 'loss', 'content': 0.046333398669958115, 'timestamp': '2025-10-01 04:34:29.539193', 'step': 17458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:29.571262', 'step': 17458, 'epoch': 3} {'type': 'loss', 'content': 0.043818674981594086, 'timestamp': '2025-10-01 04:34:29.573571', 'step': 17459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:29.606800', 'step': 17459, 'epoch': 3} {'type': 'loss', 'content': 0.056251876056194305, 'timestamp': '2025-10-01 04:34:29.630900', 'step': 17460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:29.675563', 'step': 17460, 'epoch': 3} {'type': 'loss', 'content': 0.048319097608327866, 'timestamp': '2025-10-01 04:34:29.677870', 'step': 17461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:29.710603', 'step': 17461, 'epoch': 3} {'type': 'loss', 'content': 0.06790303438901901, 'timestamp': '2025-10-01 04:34:29.713103', 'step': 17462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:29.745933', 'step': 17462, 'epoch': 3} {'type': 'loss', 'content': 0.03622070699930191, 'timestamp': '2025-10-01 04:34:29.750300', 'step': 17463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:29.781985', 'step': 17463, 'epoch': 3} {'type': 'loss', 'content': 0.05930791795253754, 'timestamp': '2025-10-01 04:34:29.805667', 'step': 17464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:29.844739', 'step': 17464, 'epoch': 3} {'type': 'loss', 'content': 0.07102799415588379, 'timestamp': '2025-10-01 04:34:29.846951', 'step': 17465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:29.884968', 'step': 17465, 'epoch': 3} {'type': 'loss', 'content': 0.03543495386838913, 'timestamp': '2025-10-01 04:34:29.887255', 'step': 17466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:29.920816', 'step': 17466, 'epoch': 3} {'type': 'loss', 'content': 0.0777185931801796, 'timestamp': '2025-10-01 04:34:29.923020', 'step': 17467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:29.954984', 'step': 17467, 'epoch': 3} {'type': 'loss', 'content': 0.13454696536064148, 'timestamp': '2025-10-01 04:34:29.978764', 'step': 17468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:30.023648', 'step': 17468, 'epoch': 3} {'type': 'loss', 'content': 0.05219745263457298, 'timestamp': '2025-10-01 04:34:30.025985', 'step': 17469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:30.059516', 'step': 17469, 'epoch': 3} {'type': 'loss', 'content': 0.07202593982219696, 'timestamp': '2025-10-01 04:34:30.062038', 'step': 17470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:30.095334', 'step': 17470, 'epoch': 3} {'type': 'loss', 'content': 0.1140316054224968, 'timestamp': '2025-10-01 04:34:30.097549', 'step': 17471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:30.130505', 'step': 17471, 'epoch': 3} {'type': 'loss', 'content': 0.024233311414718628, 'timestamp': '2025-10-01 04:34:30.154173', 'step': 17472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:30.203545', 'step': 17472, 'epoch': 3} {'type': 'loss', 'content': 0.0969635397195816, 'timestamp': '2025-10-01 04:34:30.205900', 'step': 17473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:30.238130', 'step': 17473, 'epoch': 3} {'type': 'loss', 'content': 0.11076393723487854, 'timestamp': '2025-10-01 04:34:30.240822', 'step': 17474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:30.273666', 'step': 17474, 'epoch': 3} {'type': 'loss', 'content': 0.05935855209827423, 'timestamp': '2025-10-01 04:34:30.275995', 'step': 17475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:30.313527', 'step': 17475, 'epoch': 3} {'type': 'loss', 'content': 0.059878598898649216, 'timestamp': '2025-10-01 04:34:30.337367', 'step': 17476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:30.379291', 'step': 17476, 'epoch': 3} {'type': 'loss', 'content': 0.029999876394867897, 'timestamp': '2025-10-01 04:34:30.381875', 'step': 17477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:30.418264', 'step': 17477, 'epoch': 3} {'type': 'loss', 'content': 0.06284013390541077, 'timestamp': '2025-10-01 04:34:30.420874', 'step': 17478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:30.460051', 'step': 17478, 'epoch': 3} {'type': 'loss', 'content': 0.06233721598982811, 'timestamp': '2025-10-01 04:34:30.462393', 'step': 17479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:30.494647', 'step': 17479, 'epoch': 3} {'type': 'loss', 'content': 0.03822598606348038, 'timestamp': '2025-10-01 04:34:30.518355', 'step': 17480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:30.550119', 'step': 17480, 'epoch': 3} {'type': 'loss', 'content': 0.022873446345329285, 'timestamp': '2025-10-01 04:34:30.552417', 'step': 17481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:30.583657', 'step': 17481, 'epoch': 3} {'type': 'loss', 'content': 0.030327582731842995, 'timestamp': '2025-10-01 04:34:30.585941', 'step': 17482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:30.619534', 'step': 17482, 'epoch': 3} {'type': 'loss', 'content': 0.042178817093372345, 'timestamp': '2025-10-01 04:34:30.622174', 'step': 17483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:30.654227', 'step': 17483, 'epoch': 3} {'type': 'loss', 'content': 0.004689111839979887, 'timestamp': '2025-10-01 04:34:30.677991', 'step': 17484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:30.709380', 'step': 17484, 'epoch': 3} {'type': 'loss', 'content': 0.06870751082897186, 'timestamp': '2025-10-01 04:34:30.711892', 'step': 17485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:30.746600', 'step': 17485, 'epoch': 3} {'type': 'loss', 'content': 0.03529607877135277, 'timestamp': '2025-10-01 04:34:30.749081', 'step': 17486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:30.780962', 'step': 17486, 'epoch': 3} {'type': 'loss', 'content': 0.060168199241161346, 'timestamp': '2025-10-01 04:34:30.790056', 'step': 17487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:30.842480', 'step': 17487, 'epoch': 3} {'type': 'loss', 'content': 0.10399997979402542, 'timestamp': '2025-10-01 04:34:30.866408', 'step': 17488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:30.898896', 'step': 17488, 'epoch': 3} {'type': 'loss', 'content': 0.04522019624710083, 'timestamp': '2025-10-01 04:34:30.901267', 'step': 17489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:30.932603', 'step': 17489, 'epoch': 3} {'type': 'loss', 'content': 0.06900347769260406, 'timestamp': '2025-10-01 04:34:30.934955', 'step': 17490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:30.967458', 'step': 17490, 'epoch': 3} {'type': 'loss', 'content': 0.07161028683185577, 'timestamp': '2025-10-01 04:34:30.970124', 'step': 17491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:31.013426', 'step': 17491, 'epoch': 3} {'type': 'loss', 'content': 0.030398130416870117, 'timestamp': '2025-10-01 04:34:31.037411', 'step': 17492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:31.073958', 'step': 17492, 'epoch': 3} {'type': 'loss', 'content': 0.030579378828406334, 'timestamp': '2025-10-01 04:34:31.076414', 'step': 17493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:31.108473', 'step': 17493, 'epoch': 3} {'type': 'loss', 'content': 0.028910933062434196, 'timestamp': '2025-10-01 04:34:31.110829', 'step': 17494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:34:31.148393', 'step': 17494, 'epoch': 3} {'type': 'loss', 'content': 0.051053766161203384, 'timestamp': '2025-10-01 04:34:31.152848', 'step': 17495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:31.191103', 'step': 17495, 'epoch': 3} {'type': 'loss', 'content': 0.024057108908891678, 'timestamp': '2025-10-01 04:34:31.215006', 'step': 17496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:31.252280', 'step': 17496, 'epoch': 3} {'type': 'loss', 'content': 0.03179362416267395, 'timestamp': '2025-10-01 04:34:31.254548', 'step': 17497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:31.287485', 'step': 17497, 'epoch': 3} {'type': 'loss', 'content': 0.08395978063344955, 'timestamp': '2025-10-01 04:34:31.289775', 'step': 17498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:31.323085', 'step': 17498, 'epoch': 3} {'type': 'loss', 'content': 0.07192330807447433, 'timestamp': '2025-10-01 04:34:31.325296', 'step': 17499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:31.357248', 'step': 17499, 'epoch': 3} {'type': 'loss', 'content': 0.03332339599728584, 'timestamp': '2025-10-01 04:34:31.380935', 'step': 17500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17500', 'timestamp': '2025-10-01 04:34:37.008649', 'step': 17500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:37.071082', 'step': 17500, 'epoch': 3} {'type': 'loss', 'content': 0.08239724487066269, 'timestamp': '2025-10-01 04:34:37.073851', 'step': 17501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:37.110604', 'step': 17501, 'epoch': 3} {'type': 'loss', 'content': 0.06590092182159424, 'timestamp': '2025-10-01 04:34:37.113424', 'step': 17502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:37.146767', 'step': 17502, 'epoch': 3} {'type': 'loss', 'content': 0.09787978231906891, 'timestamp': '2025-10-01 04:34:37.149670', 'step': 17503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.181727', 'step': 17503, 'epoch': 3} {'type': 'loss', 'content': 0.0921192541718483, 'timestamp': '2025-10-01 04:34:37.206899', 'step': 17504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:37.249456', 'step': 17504, 'epoch': 3} {'type': 'loss', 'content': 0.08194584399461746, 'timestamp': '2025-10-01 04:34:37.262064', 'step': 17505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:37.294735', 'step': 17505, 'epoch': 3} {'type': 'loss', 'content': 0.04282486438751221, 'timestamp': '2025-10-01 04:34:37.297178', 'step': 17506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.330591', 'step': 17506, 'epoch': 3} {'type': 'loss', 'content': 0.11298589408397675, 'timestamp': '2025-10-01 04:34:37.335314', 'step': 17507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.378011', 'step': 17507, 'epoch': 3} {'type': 'loss', 'content': 0.036984436213970184, 'timestamp': '2025-10-01 04:34:37.402308', 'step': 17508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.437130', 'step': 17508, 'epoch': 3} {'type': 'loss', 'content': 0.06330277025699615, 'timestamp': '2025-10-01 04:34:37.439314', 'step': 17509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:37.469833', 'step': 17509, 'epoch': 3} {'type': 'loss', 'content': 0.034773197025060654, 'timestamp': '2025-10-01 04:34:37.472190', 'step': 17510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:37.502847', 'step': 17510, 'epoch': 3} {'type': 'loss', 'content': 0.08542224764823914, 'timestamp': '2025-10-01 04:34:37.505243', 'step': 17511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.537563', 'step': 17511, 'epoch': 3} {'type': 'loss', 'content': 0.009274131618440151, 'timestamp': '2025-10-01 04:34:37.561492', 'step': 17512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:37.596398', 'step': 17512, 'epoch': 3} {'type': 'loss', 'content': 0.05481543019413948, 'timestamp': '2025-10-01 04:34:37.599648', 'step': 17513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.631943', 'step': 17513, 'epoch': 3} {'type': 'loss', 'content': 0.0510244220495224, 'timestamp': '2025-10-01 04:34:37.634420', 'step': 17514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.666382', 'step': 17514, 'epoch': 3} {'type': 'loss', 'content': 0.03063119761645794, 'timestamp': '2025-10-01 04:34:37.669222', 'step': 17515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.701231', 'step': 17515, 'epoch': 3} {'type': 'loss', 'content': 0.01625160314142704, 'timestamp': '2025-10-01 04:34:37.724931', 'step': 17516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.756203', 'step': 17516, 'epoch': 3} {'type': 'loss', 'content': 0.018890826031565666, 'timestamp': '2025-10-01 04:34:37.758513', 'step': 17517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:37.804863', 'step': 17517, 'epoch': 3} {'type': 'loss', 'content': 0.049059003591537476, 'timestamp': '2025-10-01 04:34:37.807141', 'step': 17518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.838718', 'step': 17518, 'epoch': 3} {'type': 'loss', 'content': 0.07134975492954254, 'timestamp': '2025-10-01 04:34:37.841096', 'step': 17519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:37.878509', 'step': 17519, 'epoch': 3} {'type': 'loss', 'content': 0.13814404606819153, 'timestamp': '2025-10-01 04:34:37.902905', 'step': 17520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:37.939601', 'step': 17520, 'epoch': 3} {'type': 'loss', 'content': 0.05813068896532059, 'timestamp': '2025-10-01 04:34:37.942772', 'step': 17521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:37.980789', 'step': 17521, 'epoch': 3} {'type': 'loss', 'content': 0.033572617918252945, 'timestamp': '2025-10-01 04:34:37.983841', 'step': 17522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:38.014178', 'step': 17522, 'epoch': 3} {'type': 'loss', 'content': 0.04464671388268471, 'timestamp': '2025-10-01 04:34:38.016464', 'step': 17523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:38.050516', 'step': 17523, 'epoch': 3} {'type': 'loss', 'content': 0.057517968118190765, 'timestamp': '2025-10-01 04:34:38.074427', 'step': 17524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:38.109249', 'step': 17524, 'epoch': 3} {'type': 'loss', 'content': 0.07142198085784912, 'timestamp': '2025-10-01 04:34:38.113029', 'step': 17525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:38.146529', 'step': 17525, 'epoch': 3} {'type': 'loss', 'content': 0.05928906798362732, 'timestamp': '2025-10-01 04:34:38.148997', 'step': 17526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:38.181620', 'step': 17526, 'epoch': 3} {'type': 'loss', 'content': 0.02454623021185398, 'timestamp': '2025-10-01 04:34:38.183900', 'step': 17527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:38.216478', 'step': 17527, 'epoch': 3} {'type': 'loss', 'content': 0.043354567140340805, 'timestamp': '2025-10-01 04:34:38.240501', 'step': 17528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:38.273958', 'step': 17528, 'epoch': 3} {'type': 'loss', 'content': 0.12083502858877182, 'timestamp': '2025-10-01 04:34:38.276189', 'step': 17529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:38.308855', 'step': 17529, 'epoch': 3} {'type': 'loss', 'content': 0.10485665500164032, 'timestamp': '2025-10-01 04:34:38.311310', 'step': 17530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:38.344855', 'step': 17530, 'epoch': 3} {'type': 'loss', 'content': 0.032007746398448944, 'timestamp': '2025-10-01 04:34:38.346939', 'step': 17531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:38.378859', 'step': 17531, 'epoch': 3} {'type': 'loss', 'content': 0.027503246441483498, 'timestamp': '2025-10-01 04:34:38.402657', 'step': 17532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:38.435862', 'step': 17532, 'epoch': 3} {'type': 'loss', 'content': 0.021830130368471146, 'timestamp': '2025-10-01 04:34:38.438142', 'step': 17533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:38.480873', 'step': 17533, 'epoch': 3} {'type': 'loss', 'content': 0.08209147304296494, 'timestamp': '2025-10-01 04:34:38.483231', 'step': 17534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:38.514844', 'step': 17534, 'epoch': 3} {'type': 'loss', 'content': 0.0320337675511837, 'timestamp': '2025-10-01 04:34:38.517022', 'step': 17535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:38.549225', 'step': 17535, 'epoch': 3} {'type': 'loss', 'content': 0.061765577644109726, 'timestamp': '2025-10-01 04:34:38.572929', 'step': 17536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:38.604077', 'step': 17536, 'epoch': 3} {'type': 'loss', 'content': 0.11994501203298569, 'timestamp': '2025-10-01 04:34:38.606330', 'step': 17537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:38.636902', 'step': 17537, 'epoch': 3} {'type': 'loss', 'content': 0.016399698331952095, 'timestamp': '2025-10-01 04:34:38.639155', 'step': 17538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:38.671696', 'step': 17538, 'epoch': 3} {'type': 'loss', 'content': 0.08820756524801254, 'timestamp': '2025-10-01 04:34:38.673893', 'step': 17539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:38.710049', 'step': 17539, 'epoch': 3} {'type': 'loss', 'content': 0.04639190062880516, 'timestamp': '2025-10-01 04:34:38.734092', 'step': 17540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:38.771342', 'step': 17540, 'epoch': 3} {'type': 'loss', 'content': 0.025433240458369255, 'timestamp': '2025-10-01 04:34:38.773755', 'step': 17541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:38.804868', 'step': 17541, 'epoch': 3} {'type': 'loss', 'content': 0.036077965050935745, 'timestamp': '2025-10-01 04:34:38.807464', 'step': 17542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:38.841983', 'step': 17542, 'epoch': 3} {'type': 'loss', 'content': 0.08020022511482239, 'timestamp': '2025-10-01 04:34:38.844453', 'step': 17543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:38.880603', 'step': 17543, 'epoch': 3} {'type': 'loss', 'content': 0.08191587030887604, 'timestamp': '2025-10-01 04:34:38.904641', 'step': 17544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:38.936651', 'step': 17544, 'epoch': 3} {'type': 'loss', 'content': 0.04463910311460495, 'timestamp': '2025-10-01 04:34:38.939010', 'step': 17545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:38.974306', 'step': 17545, 'epoch': 3} {'type': 'loss', 'content': 0.051324356347322464, 'timestamp': '2025-10-01 04:34:38.976823', 'step': 17546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:39.014942', 'step': 17546, 'epoch': 3} {'type': 'loss', 'content': 0.04845980927348137, 'timestamp': '2025-10-01 04:34:39.017483', 'step': 17547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:39.053524', 'step': 17547, 'epoch': 3} {'type': 'loss', 'content': 0.03370186686515808, 'timestamp': '2025-10-01 04:34:39.077698', 'step': 17548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:39.126265', 'step': 17548, 'epoch': 3} {'type': 'loss', 'content': 0.06525755673646927, 'timestamp': '2025-10-01 04:34:39.128630', 'step': 17549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:39.160922', 'step': 17549, 'epoch': 3} {'type': 'loss', 'content': 0.040091101080179214, 'timestamp': '2025-10-01 04:34:39.163510', 'step': 17550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:39.202013', 'step': 17550, 'epoch': 3} {'type': 'loss', 'content': 0.0767463818192482, 'timestamp': '2025-10-01 04:34:39.204368', 'step': 17551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:39.244855', 'step': 17551, 'epoch': 3} {'type': 'loss', 'content': 0.06370015442371368, 'timestamp': '2025-10-01 04:34:39.268680', 'step': 17552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:39.302732', 'step': 17552, 'epoch': 3} {'type': 'loss', 'content': 0.04484420642256737, 'timestamp': '2025-10-01 04:34:39.305152', 'step': 17553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:39.354947', 'step': 17553, 'epoch': 3} {'type': 'loss', 'content': 0.07201381772756577, 'timestamp': '2025-10-01 04:34:39.357300', 'step': 17554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:39.391321', 'step': 17554, 'epoch': 3} {'type': 'loss', 'content': 0.04597796872258186, 'timestamp': '2025-10-01 04:34:39.393619', 'step': 17555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:39.426938', 'step': 17555, 'epoch': 3} {'type': 'loss', 'content': 0.05371776968240738, 'timestamp': '2025-10-01 04:34:39.453561', 'step': 17556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:39.496129', 'step': 17556, 'epoch': 3} {'type': 'loss', 'content': 0.0545748732984066, 'timestamp': '2025-10-01 04:34:39.498869', 'step': 17557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:39.533958', 'step': 17557, 'epoch': 3} {'type': 'loss', 'content': 0.022085431963205338, 'timestamp': '2025-10-01 04:34:39.536740', 'step': 17558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:39.584964', 'step': 17558, 'epoch': 3} {'type': 'loss', 'content': 0.08181451261043549, 'timestamp': '2025-10-01 04:34:39.587383', 'step': 17559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:39.623027', 'step': 17559, 'epoch': 3} {'type': 'loss', 'content': 0.056670717895030975, 'timestamp': '2025-10-01 04:34:39.646762', 'step': 17560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:39.679462', 'step': 17560, 'epoch': 3} {'type': 'loss', 'content': 0.10013161599636078, 'timestamp': '2025-10-01 04:34:39.681747', 'step': 17561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:39.714217', 'step': 17561, 'epoch': 3} {'type': 'loss', 'content': 0.11583739519119263, 'timestamp': '2025-10-01 04:34:39.716458', 'step': 17562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:39.749090', 'step': 17562, 'epoch': 3} {'type': 'loss', 'content': 0.07849947363138199, 'timestamp': '2025-10-01 04:34:39.753204', 'step': 17563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:39.786331', 'step': 17563, 'epoch': 3} {'type': 'loss', 'content': 0.03281006962060928, 'timestamp': '2025-10-01 04:34:39.816326', 'step': 17564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:39.847169', 'step': 17564, 'epoch': 3} {'type': 'loss', 'content': 0.057236675173044205, 'timestamp': '2025-10-01 04:34:39.849444', 'step': 17565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:39.882156', 'step': 17565, 'epoch': 3} {'type': 'loss', 'content': 0.07492967694997787, 'timestamp': '2025-10-01 04:34:39.884402', 'step': 17566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:39.935563', 'step': 17566, 'epoch': 3} {'type': 'loss', 'content': 0.06519250571727753, 'timestamp': '2025-10-01 04:34:39.938208', 'step': 17567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:39.970469', 'step': 17567, 'epoch': 3} {'type': 'loss', 'content': 0.012912807054817677, 'timestamp': '2025-10-01 04:34:39.994440', 'step': 17568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:40.026713', 'step': 17568, 'epoch': 3} {'type': 'loss', 'content': 0.05642113834619522, 'timestamp': '2025-10-01 04:34:40.029357', 'step': 17569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:40.061526', 'step': 17569, 'epoch': 3} {'type': 'loss', 'content': 0.0639757364988327, 'timestamp': '2025-10-01 04:34:40.064026', 'step': 17570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:40.095396', 'step': 17570, 'epoch': 3} {'type': 'loss', 'content': 0.07030149549245834, 'timestamp': '2025-10-01 04:34:40.097722', 'step': 17571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:40.130560', 'step': 17571, 'epoch': 3} {'type': 'loss', 'content': 0.015090073458850384, 'timestamp': '2025-10-01 04:34:40.154607', 'step': 17572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:40.187276', 'step': 17572, 'epoch': 3} {'type': 'loss', 'content': 0.1096992939710617, 'timestamp': '2025-10-01 04:34:40.189732', 'step': 17573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:40.223131', 'step': 17573, 'epoch': 3} {'type': 'loss', 'content': 0.009408463723957539, 'timestamp': '2025-10-01 04:34:40.225261', 'step': 17574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:40.274440', 'step': 17574, 'epoch': 3} {'type': 'loss', 'content': 0.055338963866233826, 'timestamp': '2025-10-01 04:34:40.276878', 'step': 17575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:40.308535', 'step': 17575, 'epoch': 3} {'type': 'loss', 'content': 0.028597118332982063, 'timestamp': '2025-10-01 04:34:40.332113', 'step': 17576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:34:40.383754', 'step': 17576, 'epoch': 3} {'type': 'loss', 'content': 0.03498845547437668, 'timestamp': '2025-10-01 04:34:40.386070', 'step': 17577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:40.418827', 'step': 17577, 'epoch': 3} {'type': 'loss', 'content': 0.02689295820891857, 'timestamp': '2025-10-01 04:34:40.421108', 'step': 17578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:40.464213', 'step': 17578, 'epoch': 3} {'type': 'loss', 'content': 0.018971217796206474, 'timestamp': '2025-10-01 04:34:40.466531', 'step': 17579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:40.499856', 'step': 17579, 'epoch': 3} {'type': 'loss', 'content': 0.06557808816432953, 'timestamp': '2025-10-01 04:34:40.523750', 'step': 17580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:40.558175', 'step': 17580, 'epoch': 3} {'type': 'loss', 'content': 0.10344469547271729, 'timestamp': '2025-10-01 04:34:40.561034', 'step': 17581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:40.596122', 'step': 17581, 'epoch': 3} {'type': 'loss', 'content': 0.04589839652180672, 'timestamp': '2025-10-01 04:34:40.598642', 'step': 17582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:40.633352', 'step': 17582, 'epoch': 3} {'type': 'loss', 'content': 0.07865676283836365, 'timestamp': '2025-10-01 04:34:40.636510', 'step': 17583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:40.668895', 'step': 17583, 'epoch': 3} {'type': 'loss', 'content': 0.12923985719680786, 'timestamp': '2025-10-01 04:34:40.692794', 'step': 17584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:40.728803', 'step': 17584, 'epoch': 3} {'type': 'loss', 'content': 0.08870916813611984, 'timestamp': '2025-10-01 04:34:40.731166', 'step': 17585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:40.780140', 'step': 17585, 'epoch': 3} {'type': 'loss', 'content': 0.04857078567147255, 'timestamp': '2025-10-01 04:34:40.782611', 'step': 17586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:40.814724', 'step': 17586, 'epoch': 3} {'type': 'loss', 'content': 0.11258198320865631, 'timestamp': '2025-10-01 04:34:40.817312', 'step': 17587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:40.857068', 'step': 17587, 'epoch': 3} {'type': 'loss', 'content': 0.06440026313066483, 'timestamp': '2025-10-01 04:34:40.880941', 'step': 17588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:40.920746', 'step': 17588, 'epoch': 3} {'type': 'loss', 'content': 0.06728217005729675, 'timestamp': '2025-10-01 04:34:40.923447', 'step': 17589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:40.962211', 'step': 17589, 'epoch': 3} {'type': 'loss', 'content': 0.051989711821079254, 'timestamp': '2025-10-01 04:34:40.964574', 'step': 17590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:41.002473', 'step': 17590, 'epoch': 3} {'type': 'loss', 'content': 0.0647982582449913, 'timestamp': '2025-10-01 04:34:41.005639', 'step': 17591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:41.043666', 'step': 17591, 'epoch': 3} {'type': 'loss', 'content': 0.0681011751294136, 'timestamp': '2025-10-01 04:34:41.067701', 'step': 17592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:41.118280', 'step': 17592, 'epoch': 3} {'type': 'loss', 'content': 0.07906302064657211, 'timestamp': '2025-10-01 04:34:41.120569', 'step': 17593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:41.154688', 'step': 17593, 'epoch': 3} {'type': 'loss', 'content': 0.0754358097910881, 'timestamp': '2025-10-01 04:34:41.157176', 'step': 17594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:41.188829', 'step': 17594, 'epoch': 3} {'type': 'loss', 'content': 0.08652210980653763, 'timestamp': '2025-10-01 04:34:41.191130', 'step': 17595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:34:41.227029', 'step': 17595, 'epoch': 3} {'type': 'loss', 'content': 0.019837088882923126, 'timestamp': '2025-10-01 04:34:41.252750', 'step': 17596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:41.293509', 'step': 17596, 'epoch': 3} {'type': 'loss', 'content': 0.05127231031656265, 'timestamp': '2025-10-01 04:34:41.295773', 'step': 17597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:41.336333', 'step': 17597, 'epoch': 3} {'type': 'loss', 'content': 0.06764379143714905, 'timestamp': '2025-10-01 04:34:41.339311', 'step': 17598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:41.381732', 'step': 17598, 'epoch': 3} {'type': 'loss', 'content': 0.050802081823349, 'timestamp': '2025-10-01 04:34:41.384169', 'step': 17599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:41.419594', 'step': 17599, 'epoch': 3} {'type': 'loss', 'content': 0.08132708817720413, 'timestamp': '2025-10-01 04:34:41.443291', 'step': 17600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:41.475820', 'step': 17600, 'epoch': 3} {'type': 'loss', 'content': 0.03651079908013344, 'timestamp': '2025-10-01 04:34:41.478137', 'step': 17601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:41.512988', 'step': 17601, 'epoch': 3} {'type': 'loss', 'content': 0.05986868217587471, 'timestamp': '2025-10-01 04:34:41.515584', 'step': 17602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:41.546468', 'step': 17602, 'epoch': 3} {'type': 'loss', 'content': 0.051463741809129715, 'timestamp': '2025-10-01 04:34:41.554676', 'step': 17603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:41.588738', 'step': 17603, 'epoch': 3} {'type': 'loss', 'content': 0.044325463473796844, 'timestamp': '2025-10-01 04:34:41.617004', 'step': 17604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:41.648483', 'step': 17604, 'epoch': 3} {'type': 'loss', 'content': 0.029269719496369362, 'timestamp': '2025-10-01 04:34:41.650868', 'step': 17605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:41.686942', 'step': 17605, 'epoch': 3} {'type': 'loss', 'content': 0.05999251455068588, 'timestamp': '2025-10-01 04:34:41.689296', 'step': 17606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:41.722942', 'step': 17606, 'epoch': 3} {'type': 'loss', 'content': 0.05345793813467026, 'timestamp': '2025-10-01 04:34:41.725184', 'step': 17607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:41.758329', 'step': 17607, 'epoch': 3} {'type': 'loss', 'content': 0.052632857114076614, 'timestamp': '2025-10-01 04:34:41.794899', 'step': 17608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:41.827504', 'step': 17608, 'epoch': 3} {'type': 'loss', 'content': 0.047845322638750076, 'timestamp': '2025-10-01 04:34:41.829796', 'step': 17609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:41.869000', 'step': 17609, 'epoch': 3} {'type': 'loss', 'content': 0.08252072334289551, 'timestamp': '2025-10-01 04:34:41.871336', 'step': 17610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:41.907655', 'step': 17610, 'epoch': 3} {'type': 'loss', 'content': 0.0879688635468483, 'timestamp': '2025-10-01 04:34:41.910112', 'step': 17611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:41.954450', 'step': 17611, 'epoch': 3} {'type': 'loss', 'content': 0.07742314040660858, 'timestamp': '2025-10-01 04:34:41.981377', 'step': 17612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:42.014502', 'step': 17612, 'epoch': 3} {'type': 'loss', 'content': 0.013863944448530674, 'timestamp': '2025-10-01 04:34:42.017843', 'step': 17613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:42.064725', 'step': 17613, 'epoch': 3} {'type': 'loss', 'content': 0.10491234064102173, 'timestamp': '2025-10-01 04:34:42.067658', 'step': 17614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:42.101966', 'step': 17614, 'epoch': 3} {'type': 'loss', 'content': 0.0885903611779213, 'timestamp': '2025-10-01 04:34:42.104849', 'step': 17615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:42.138333', 'step': 17615, 'epoch': 3} {'type': 'loss', 'content': 0.038413867354393005, 'timestamp': '2025-10-01 04:34:42.174018', 'step': 17616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:42.204643', 'step': 17616, 'epoch': 3} {'type': 'loss', 'content': 0.033421896398067474, 'timestamp': '2025-10-01 04:34:42.207227', 'step': 17617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:42.241821', 'step': 17617, 'epoch': 3} {'type': 'loss', 'content': 0.05106351524591446, 'timestamp': '2025-10-01 04:34:42.244283', 'step': 17618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:42.278844', 'step': 17618, 'epoch': 3} {'type': 'loss', 'content': 0.0860571637749672, 'timestamp': '2025-10-01 04:34:42.282086', 'step': 17619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:42.317055', 'step': 17619, 'epoch': 3} {'type': 'loss', 'content': 0.06721839308738708, 'timestamp': '2025-10-01 04:34:42.340677', 'step': 17620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:42.374316', 'step': 17620, 'epoch': 3} {'type': 'loss', 'content': 0.024275943636894226, 'timestamp': '2025-10-01 04:34:42.377207', 'step': 17621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:42.411046', 'step': 17621, 'epoch': 3} {'type': 'loss', 'content': 0.03632507100701332, 'timestamp': '2025-10-01 04:34:42.414321', 'step': 17622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:42.456330', 'step': 17622, 'epoch': 3} {'type': 'loss', 'content': 0.09167739003896713, 'timestamp': '2025-10-01 04:34:42.459078', 'step': 17623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:42.491652', 'step': 17623, 'epoch': 3} {'type': 'loss', 'content': 0.06848553568124771, 'timestamp': '2025-10-01 04:34:42.515961', 'step': 17624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:42.547008', 'step': 17624, 'epoch': 3} {'type': 'loss', 'content': 0.1305209845304489, 'timestamp': '2025-10-01 04:34:42.549649', 'step': 17625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:42.584287', 'step': 17625, 'epoch': 3} {'type': 'loss', 'content': 0.06065632775425911, 'timestamp': '2025-10-01 04:34:42.586986', 'step': 17626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:42.618565', 'step': 17626, 'epoch': 3} {'type': 'loss', 'content': 0.10387735813856125, 'timestamp': '2025-10-01 04:34:42.620832', 'step': 17627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:42.652011', 'step': 17627, 'epoch': 3} {'type': 'loss', 'content': 0.05997694283723831, 'timestamp': '2025-10-01 04:34:42.676158', 'step': 17628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:42.710200', 'step': 17628, 'epoch': 3} {'type': 'loss', 'content': 0.06487440317869186, 'timestamp': '2025-10-01 04:34:42.712801', 'step': 17629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:42.745332', 'step': 17629, 'epoch': 3} {'type': 'loss', 'content': 0.16945324838161469, 'timestamp': '2025-10-01 04:34:42.747905', 'step': 17630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:42.781801', 'step': 17630, 'epoch': 3} {'type': 'loss', 'content': 0.08340245485305786, 'timestamp': '2025-10-01 04:34:42.784496', 'step': 17631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:42.816954', 'step': 17631, 'epoch': 3} {'type': 'loss', 'content': 0.1237732544541359, 'timestamp': '2025-10-01 04:34:42.840605', 'step': 17632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:42.877333', 'step': 17632, 'epoch': 3} {'type': 'loss', 'content': 0.12937967479228973, 'timestamp': '2025-10-01 04:34:42.880383', 'step': 17633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:42.914094', 'step': 17633, 'epoch': 3} {'type': 'loss', 'content': 0.03301801532506943, 'timestamp': '2025-10-01 04:34:42.916355', 'step': 17634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:42.950019', 'step': 17634, 'epoch': 3} {'type': 'loss', 'content': 0.10762687027454376, 'timestamp': '2025-10-01 04:34:42.952844', 'step': 17635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:42.984133', 'step': 17635, 'epoch': 3} {'type': 'loss', 'content': 0.07453693449497223, 'timestamp': '2025-10-01 04:34:43.008535', 'step': 17636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:43.057079', 'step': 17636, 'epoch': 3} {'type': 'loss', 'content': 0.0677124634385109, 'timestamp': '2025-10-01 04:34:43.070142', 'step': 17637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:43.102626', 'step': 17637, 'epoch': 3} {'type': 'loss', 'content': 0.09011676162481308, 'timestamp': '2025-10-01 04:34:43.105032', 'step': 17638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:43.137150', 'step': 17638, 'epoch': 3} {'type': 'loss', 'content': 0.04281981661915779, 'timestamp': '2025-10-01 04:34:43.139567', 'step': 17639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:43.170633', 'step': 17639, 'epoch': 3} {'type': 'loss', 'content': 0.21849456429481506, 'timestamp': '2025-10-01 04:34:43.194321', 'step': 17640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:43.224968', 'step': 17640, 'epoch': 3} {'type': 'loss', 'content': 0.09136463701725006, 'timestamp': '2025-10-01 04:34:43.227036', 'step': 17641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:43.263430', 'step': 17641, 'epoch': 3} {'type': 'loss', 'content': 0.07282628864049911, 'timestamp': '2025-10-01 04:34:43.265947', 'step': 17642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:43.297234', 'step': 17642, 'epoch': 3} {'type': 'loss', 'content': 0.07907196134328842, 'timestamp': '2025-10-01 04:34:43.299496', 'step': 17643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:43.339752', 'step': 17643, 'epoch': 3} {'type': 'loss', 'content': 0.05441940203309059, 'timestamp': '2025-10-01 04:34:43.367971', 'step': 17644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:43.413786', 'step': 17644, 'epoch': 3} {'type': 'loss', 'content': 0.09464903920888901, 'timestamp': '2025-10-01 04:34:43.416574', 'step': 17645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:43.452915', 'step': 17645, 'epoch': 3} {'type': 'loss', 'content': 0.06161177530884743, 'timestamp': '2025-10-01 04:34:43.455454', 'step': 17646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:43.494564', 'step': 17646, 'epoch': 3} {'type': 'loss', 'content': 0.09241455793380737, 'timestamp': '2025-10-01 04:34:43.500221', 'step': 17647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:43.538628', 'step': 17647, 'epoch': 3} {'type': 'loss', 'content': 0.06107432767748833, 'timestamp': '2025-10-01 04:34:43.563106', 'step': 17648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:43.596703', 'step': 17648, 'epoch': 3} {'type': 'loss', 'content': 0.07900295406579971, 'timestamp': '2025-10-01 04:34:43.599749', 'step': 17649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:43.636481', 'step': 17649, 'epoch': 3} {'type': 'loss', 'content': 0.05968153104186058, 'timestamp': '2025-10-01 04:34:43.639607', 'step': 17650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:43.674802', 'step': 17650, 'epoch': 3} {'type': 'loss', 'content': 0.22876092791557312, 'timestamp': '2025-10-01 04:34:43.677139', 'step': 17651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:43.714724', 'step': 17651, 'epoch': 3} {'type': 'loss', 'content': 0.10142843425273895, 'timestamp': '2025-10-01 04:34:43.738389', 'step': 17652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:43.774054', 'step': 17652, 'epoch': 3} {'type': 'loss', 'content': 0.10361173748970032, 'timestamp': '2025-10-01 04:34:43.776328', 'step': 17653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:43.810615', 'step': 17653, 'epoch': 3} {'type': 'loss', 'content': 0.07447401434183121, 'timestamp': '2025-10-01 04:34:43.812535', 'step': 17654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:43.848370', 'step': 17654, 'epoch': 3} {'type': 'loss', 'content': 0.16264446079730988, 'timestamp': '2025-10-01 04:34:43.850574', 'step': 17655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:43.887571', 'step': 17655, 'epoch': 3} {'type': 'loss', 'content': 0.07804635167121887, 'timestamp': '2025-10-01 04:34:43.914717', 'step': 17656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:43.952656', 'step': 17656, 'epoch': 3} {'type': 'loss', 'content': 0.048283107578754425, 'timestamp': '2025-10-01 04:34:43.954862', 'step': 17657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:43.991583', 'step': 17657, 'epoch': 3} {'type': 'loss', 'content': 0.09915578365325928, 'timestamp': '2025-10-01 04:34:43.993932', 'step': 17658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:44.025365', 'step': 17658, 'epoch': 3} {'type': 'loss', 'content': 0.0829511433839798, 'timestamp': '2025-10-01 04:34:44.027835', 'step': 17659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:44.059797', 'step': 17659, 'epoch': 3} {'type': 'loss', 'content': 0.11265167593955994, 'timestamp': '2025-10-01 04:34:44.083313', 'step': 17660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:44.125687', 'step': 17660, 'epoch': 3} {'type': 'loss', 'content': 0.09620606154203415, 'timestamp': '2025-10-01 04:34:44.127785', 'step': 17661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:44.161256', 'step': 17661, 'epoch': 3} {'type': 'loss', 'content': 0.04022146388888359, 'timestamp': '2025-10-01 04:34:44.163268', 'step': 17662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:44.201109', 'step': 17662, 'epoch': 3} {'type': 'loss', 'content': 0.04828731343150139, 'timestamp': '2025-10-01 04:34:44.203401', 'step': 17663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:44.240281', 'step': 17663, 'epoch': 3} {'type': 'loss', 'content': 0.05821391940116882, 'timestamp': '2025-10-01 04:34:44.264272', 'step': 17664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:44.297448', 'step': 17664, 'epoch': 3} {'type': 'loss', 'content': 0.0822417140007019, 'timestamp': '2025-10-01 04:34:44.299804', 'step': 17665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:44.338336', 'step': 17665, 'epoch': 3} {'type': 'loss', 'content': 0.0773690789937973, 'timestamp': '2025-10-01 04:34:44.341149', 'step': 17666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:44.372419', 'step': 17666, 'epoch': 3} {'type': 'loss', 'content': 0.04340917989611626, 'timestamp': '2025-10-01 04:34:44.374764', 'step': 17667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:44.410540', 'step': 17667, 'epoch': 3} {'type': 'loss', 'content': 0.05148304998874664, 'timestamp': '2025-10-01 04:34:44.434554', 'step': 17668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:34:44.465432', 'step': 17668, 'epoch': 3} {'type': 'loss', 'content': 0.0954398363828659, 'timestamp': '2025-10-01 04:34:44.470188', 'step': 17669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:44.500830', 'step': 17669, 'epoch': 3} {'type': 'loss', 'content': 0.08665855973958969, 'timestamp': '2025-10-01 04:34:44.503102', 'step': 17670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:44.534189', 'step': 17670, 'epoch': 3} {'type': 'loss', 'content': 0.03971487656235695, 'timestamp': '2025-10-01 04:34:44.536902', 'step': 17671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:44.569469', 'step': 17671, 'epoch': 3} {'type': 'loss', 'content': 0.04974650591611862, 'timestamp': '2025-10-01 04:34:44.593040', 'step': 17672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:44.625936', 'step': 17672, 'epoch': 3} {'type': 'loss', 'content': 0.014923648908734322, 'timestamp': '2025-10-01 04:34:44.628041', 'step': 17673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:44.659256', 'step': 17673, 'epoch': 3} {'type': 'loss', 'content': 0.13976065814495087, 'timestamp': '2025-10-01 04:34:44.661256', 'step': 17674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:44.693636', 'step': 17674, 'epoch': 3} {'type': 'loss', 'content': 0.12073581665754318, 'timestamp': '2025-10-01 04:34:44.695638', 'step': 17675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:44.731462', 'step': 17675, 'epoch': 3} {'type': 'loss', 'content': 0.05747709795832634, 'timestamp': '2025-10-01 04:34:44.755068', 'step': 17676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:44.790421', 'step': 17676, 'epoch': 3} {'type': 'loss', 'content': 0.06596792489290237, 'timestamp': '2025-10-01 04:34:44.793278', 'step': 17677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:44.830365', 'step': 17677, 'epoch': 3} {'type': 'loss', 'content': 0.09276098012924194, 'timestamp': '2025-10-01 04:34:44.837041', 'step': 17678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:44.873850', 'step': 17678, 'epoch': 3} {'type': 'loss', 'content': 0.0958501398563385, 'timestamp': '2025-10-01 04:34:44.876758', 'step': 17679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:44.910160', 'step': 17679, 'epoch': 3} {'type': 'loss', 'content': 0.030559973791241646, 'timestamp': '2025-10-01 04:34:44.933940', 'step': 17680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:44.965687', 'step': 17680, 'epoch': 3} {'type': 'loss', 'content': 0.1065656766295433, 'timestamp': '2025-10-01 04:34:44.967938', 'step': 17681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:44.998407', 'step': 17681, 'epoch': 3} {'type': 'loss', 'content': 0.05682345852255821, 'timestamp': '2025-10-01 04:34:45.000584', 'step': 17682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:45.031118', 'step': 17682, 'epoch': 3} {'type': 'loss', 'content': 0.07613984495401382, 'timestamp': '2025-10-01 04:34:45.033108', 'step': 17683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:45.063811', 'step': 17683, 'epoch': 3} {'type': 'loss', 'content': 0.10125566273927689, 'timestamp': '2025-10-01 04:34:45.088714', 'step': 17684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:45.119229', 'step': 17684, 'epoch': 3} {'type': 'loss', 'content': 0.11694522202014923, 'timestamp': '2025-10-01 04:34:45.121421', 'step': 17685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:45.159837', 'step': 17685, 'epoch': 3} {'type': 'loss', 'content': 0.118288554251194, 'timestamp': '2025-10-01 04:34:45.162478', 'step': 17686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:45.195229', 'step': 17686, 'epoch': 3} {'type': 'loss', 'content': 0.06222527474164963, 'timestamp': '2025-10-01 04:34:45.198292', 'step': 17687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:45.229858', 'step': 17687, 'epoch': 3} {'type': 'loss', 'content': 0.01860068179666996, 'timestamp': '2025-10-01 04:34:45.253488', 'step': 17688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:45.289858', 'step': 17688, 'epoch': 3} {'type': 'loss', 'content': 0.05478622019290924, 'timestamp': '2025-10-01 04:34:45.291836', 'step': 17689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:45.323725', 'step': 17689, 'epoch': 3} {'type': 'loss', 'content': 0.11944594979286194, 'timestamp': '2025-10-01 04:34:45.326563', 'step': 17690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:45.360856', 'step': 17690, 'epoch': 3} {'type': 'loss', 'content': 0.060020819306373596, 'timestamp': '2025-10-01 04:34:45.363271', 'step': 17691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:45.395621', 'step': 17691, 'epoch': 3} {'type': 'loss', 'content': 0.1391914039850235, 'timestamp': '2025-10-01 04:34:45.419419', 'step': 17692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:45.466729', 'step': 17692, 'epoch': 3} {'type': 'loss', 'content': 0.04298551753163338, 'timestamp': '2025-10-01 04:34:45.468843', 'step': 17693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:45.502464', 'step': 17693, 'epoch': 3} {'type': 'loss', 'content': 0.12695938348770142, 'timestamp': '2025-10-01 04:34:45.504826', 'step': 17694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:45.539219', 'step': 17694, 'epoch': 3} {'type': 'loss', 'content': 0.10708329826593399, 'timestamp': '2025-10-01 04:34:45.542173', 'step': 17695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:45.577020', 'step': 17695, 'epoch': 3} {'type': 'loss', 'content': 0.049463190138339996, 'timestamp': '2025-10-01 04:34:45.600604', 'step': 17696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:45.643031', 'step': 17696, 'epoch': 3} {'type': 'loss', 'content': 0.12219077348709106, 'timestamp': '2025-10-01 04:34:45.646046', 'step': 17697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:45.697937', 'step': 17697, 'epoch': 3} {'type': 'loss', 'content': 0.08022814244031906, 'timestamp': '2025-10-01 04:34:45.700134', 'step': 17698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:45.739325', 'step': 17698, 'epoch': 3} {'type': 'loss', 'content': 0.077714703977108, 'timestamp': '2025-10-01 04:34:45.741771', 'step': 17699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:45.778977', 'step': 17699, 'epoch': 3} {'type': 'loss', 'content': 0.0980495736002922, 'timestamp': '2025-10-01 04:34:45.802578', 'step': 17700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:45.833637', 'step': 17700, 'epoch': 3} {'type': 'loss', 'content': 0.048489660024642944, 'timestamp': '2025-10-01 04:34:45.836469', 'step': 17701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:45.874800', 'step': 17701, 'epoch': 3} {'type': 'loss', 'content': 0.11065268516540527, 'timestamp': '2025-10-01 04:34:45.877050', 'step': 17702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:45.909867', 'step': 17702, 'epoch': 3} {'type': 'loss', 'content': 0.10496176779270172, 'timestamp': '2025-10-01 04:34:45.912252', 'step': 17703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:45.952400', 'step': 17703, 'epoch': 3} {'type': 'loss', 'content': 0.09790938347578049, 'timestamp': '2025-10-01 04:34:45.976540', 'step': 17704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:46.008933', 'step': 17704, 'epoch': 3} {'type': 'loss', 'content': 0.0247010700404644, 'timestamp': '2025-10-01 04:34:46.011565', 'step': 17705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:46.043532', 'step': 17705, 'epoch': 3} {'type': 'loss', 'content': 0.14431238174438477, 'timestamp': '2025-10-01 04:34:46.046466', 'step': 17706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:46.078818', 'step': 17706, 'epoch': 3} {'type': 'loss', 'content': 0.08446062356233597, 'timestamp': '2025-10-01 04:34:46.081863', 'step': 17707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:46.120895', 'step': 17707, 'epoch': 3} {'type': 'loss', 'content': 0.06079383194446564, 'timestamp': '2025-10-01 04:34:46.144429', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:34:55.551249', 'step': 17708, 'epoch': 3} {'type': 'pplx', 'content': 14073.371085159775, 'timestamp': '2025-10-01 04:34:55.554168', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:55.584208', 'step': 17708, 'epoch': 3} {'type': 'loss', 'content': 0.0379631370306015, 'timestamp': '2025-10-01 04:34:55.587113', 'step': 17709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:55.619046', 'step': 17709, 'epoch': 3} {'type': 'loss', 'content': 0.1075536459684372, 'timestamp': '2025-10-01 04:34:55.621290', 'step': 17710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:55.653295', 'step': 17710, 'epoch': 3} {'type': 'loss', 'content': 0.05360376089811325, 'timestamp': '2025-10-01 04:34:55.655642', 'step': 17711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:55.686137', 'step': 17711, 'epoch': 3} {'type': 'loss', 'content': 0.15454630553722382, 'timestamp': '2025-10-01 04:34:55.710040', 'step': 17712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:55.741335', 'step': 17712, 'epoch': 3} {'type': 'loss', 'content': 0.05437178537249565, 'timestamp': '2025-10-01 04:34:55.743485', 'step': 17713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:55.774951', 'step': 17713, 'epoch': 3} {'type': 'loss', 'content': 0.10226736217737198, 'timestamp': '2025-10-01 04:34:55.777928', 'step': 17714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:55.810071', 'step': 17714, 'epoch': 3} {'type': 'loss', 'content': 0.05453396961092949, 'timestamp': '2025-10-01 04:34:55.815663', 'step': 17715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:55.853524', 'step': 17715, 'epoch': 3} {'type': 'loss', 'content': 0.05438341200351715, 'timestamp': '2025-10-01 04:34:55.877574', 'step': 17716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:55.913077', 'step': 17716, 'epoch': 3} {'type': 'loss', 'content': 0.057113636285066605, 'timestamp': '2025-10-01 04:34:55.915298', 'step': 17717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:34:55.948331', 'step': 17717, 'epoch': 3} {'type': 'loss', 'content': 0.10397335886955261, 'timestamp': '2025-10-01 04:34:55.952590', 'step': 17718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:55.983705', 'step': 17718, 'epoch': 3} {'type': 'loss', 'content': 0.015740903094410896, 'timestamp': '2025-10-01 04:34:55.986406', 'step': 17719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:56.018181', 'step': 17719, 'epoch': 3} {'type': 'loss', 'content': 0.08775343745946884, 'timestamp': '2025-10-01 04:34:56.041873', 'step': 17720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:56.072816', 'step': 17720, 'epoch': 3} {'type': 'loss', 'content': 0.031162424013018608, 'timestamp': '2025-10-01 04:34:56.075128', 'step': 17721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:56.106206', 'step': 17721, 'epoch': 3} {'type': 'loss', 'content': 0.09391435980796814, 'timestamp': '2025-10-01 04:34:56.108888', 'step': 17722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:56.139913', 'step': 17722, 'epoch': 3} {'type': 'loss', 'content': 0.04009619355201721, 'timestamp': '2025-10-01 04:34:56.141913', 'step': 17723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:56.173407', 'step': 17723, 'epoch': 3} {'type': 'loss', 'content': 0.11721564829349518, 'timestamp': '2025-10-01 04:34:56.197413', 'step': 17724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:56.228840', 'step': 17724, 'epoch': 3} {'type': 'loss', 'content': 0.08360394090414047, 'timestamp': '2025-10-01 04:34:56.230969', 'step': 17725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:56.264678', 'step': 17725, 'epoch': 3} {'type': 'loss', 'content': 0.037509411573410034, 'timestamp': '2025-10-01 04:34:56.266868', 'step': 17726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:56.298241', 'step': 17726, 'epoch': 3} {'type': 'loss', 'content': 0.13612210750579834, 'timestamp': '2025-10-01 04:34:56.300861', 'step': 17727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:56.333106', 'step': 17727, 'epoch': 3} {'type': 'loss', 'content': 0.10830004513263702, 'timestamp': '2025-10-01 04:34:56.356855', 'step': 17728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:56.387270', 'step': 17728, 'epoch': 3} {'type': 'loss', 'content': 0.08048464357852936, 'timestamp': '2025-10-01 04:34:56.392456', 'step': 17729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:56.426515', 'step': 17729, 'epoch': 3} {'type': 'loss', 'content': 0.10205346345901489, 'timestamp': '2025-10-01 04:34:56.432696', 'step': 17730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:56.472942', 'step': 17730, 'epoch': 3} {'type': 'loss', 'content': 0.04229839891195297, 'timestamp': '2025-10-01 04:34:56.475192', 'step': 17731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:56.506283', 'step': 17731, 'epoch': 3} {'type': 'loss', 'content': 0.016185935586690903, 'timestamp': '2025-10-01 04:34:56.530122', 'step': 17732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:34:56.564897', 'step': 17732, 'epoch': 3} {'type': 'loss', 'content': 0.05938585847616196, 'timestamp': '2025-10-01 04:34:56.567069', 'step': 17733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:56.597571', 'step': 17733, 'epoch': 3} {'type': 'loss', 'content': 0.05161415413022041, 'timestamp': '2025-10-01 04:34:56.599844', 'step': 17734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:56.631318', 'step': 17734, 'epoch': 3} {'type': 'loss', 'content': 0.053001534193754196, 'timestamp': '2025-10-01 04:34:56.637359', 'step': 17735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:56.671827', 'step': 17735, 'epoch': 3} {'type': 'loss', 'content': 0.06469598412513733, 'timestamp': '2025-10-01 04:34:56.695576', 'step': 17736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:56.729797', 'step': 17736, 'epoch': 3} {'type': 'loss', 'content': 0.03870169073343277, 'timestamp': '2025-10-01 04:34:56.731868', 'step': 17737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:56.764042', 'step': 17737, 'epoch': 3} {'type': 'loss', 'content': 0.03754405677318573, 'timestamp': '2025-10-01 04:34:56.766088', 'step': 17738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:56.799764', 'step': 17738, 'epoch': 3} {'type': 'loss', 'content': 0.1462794691324234, 'timestamp': '2025-10-01 04:34:56.802259', 'step': 17739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:56.835131', 'step': 17739, 'epoch': 3} {'type': 'loss', 'content': 0.07613228261470795, 'timestamp': '2025-10-01 04:34:56.860246', 'step': 17740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:56.909464', 'step': 17740, 'epoch': 3} {'type': 'loss', 'content': 0.0648559108376503, 'timestamp': '2025-10-01 04:34:56.920377', 'step': 17741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:56.968184', 'step': 17741, 'epoch': 3} {'type': 'loss', 'content': 0.08680228143930435, 'timestamp': '2025-10-01 04:34:56.970373', 'step': 17742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:57.012142', 'step': 17742, 'epoch': 3} {'type': 'loss', 'content': 0.026682978495955467, 'timestamp': '2025-10-01 04:34:57.014557', 'step': 17743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:57.045083', 'step': 17743, 'epoch': 3} {'type': 'loss', 'content': 0.03358788788318634, 'timestamp': '2025-10-01 04:34:57.073938', 'step': 17744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:57.111102', 'step': 17744, 'epoch': 3} {'type': 'loss', 'content': 0.12911202013492584, 'timestamp': '2025-10-01 04:34:57.113460', 'step': 17745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:57.153422', 'step': 17745, 'epoch': 3} {'type': 'loss', 'content': 0.04806707426905632, 'timestamp': '2025-10-01 04:34:57.164211', 'step': 17746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:57.196398', 'step': 17746, 'epoch': 3} {'type': 'loss', 'content': 0.04964665696024895, 'timestamp': '2025-10-01 04:34:57.198753', 'step': 17747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:57.232697', 'step': 17747, 'epoch': 3} {'type': 'loss', 'content': 0.01705620251595974, 'timestamp': '2025-10-01 04:34:57.256457', 'step': 17748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:57.289111', 'step': 17748, 'epoch': 3} {'type': 'loss', 'content': 0.01653056964278221, 'timestamp': '2025-10-01 04:34:57.291632', 'step': 17749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:57.322032', 'step': 17749, 'epoch': 3} {'type': 'loss', 'content': 0.10645762830972672, 'timestamp': '2025-10-01 04:34:57.324632', 'step': 17750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:57.356977', 'step': 17750, 'epoch': 3} {'type': 'loss', 'content': 0.08230235427618027, 'timestamp': '2025-10-01 04:34:57.359241', 'step': 17751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:57.394635', 'step': 17751, 'epoch': 3} {'type': 'loss', 'content': 0.04929611459374428, 'timestamp': '2025-10-01 04:34:57.422066', 'step': 17752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:57.469209', 'step': 17752, 'epoch': 3} {'type': 'loss', 'content': 0.02288845181465149, 'timestamp': '2025-10-01 04:34:57.479257', 'step': 17753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:57.518058', 'step': 17753, 'epoch': 3} {'type': 'loss', 'content': 0.04710359126329422, 'timestamp': '2025-10-01 04:34:57.521101', 'step': 17754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:57.553763', 'step': 17754, 'epoch': 3} {'type': 'loss', 'content': 0.03658018633723259, 'timestamp': '2025-10-01 04:34:57.556260', 'step': 17755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:57.606391', 'step': 17755, 'epoch': 3} {'type': 'loss', 'content': 0.08505073934793472, 'timestamp': '2025-10-01 04:34:57.630226', 'step': 17756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:57.663118', 'step': 17756, 'epoch': 3} {'type': 'loss', 'content': 0.08528722077608109, 'timestamp': '2025-10-01 04:34:57.665432', 'step': 17757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:57.706597', 'step': 17757, 'epoch': 3} {'type': 'loss', 'content': 0.10172164440155029, 'timestamp': '2025-10-01 04:34:57.709276', 'step': 17758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:57.743434', 'step': 17758, 'epoch': 3} {'type': 'loss', 'content': 0.047492701560258865, 'timestamp': '2025-10-01 04:34:57.745665', 'step': 17759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:57.776037', 'step': 17759, 'epoch': 3} {'type': 'loss', 'content': 0.09734363853931427, 'timestamp': '2025-10-01 04:34:57.800207', 'step': 17760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:57.831631', 'step': 17760, 'epoch': 3} {'type': 'loss', 'content': 0.11620520055294037, 'timestamp': '2025-10-01 04:34:57.834526', 'step': 17761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:57.867933', 'step': 17761, 'epoch': 3} {'type': 'loss', 'content': 0.097835473716259, 'timestamp': '2025-10-01 04:34:57.870644', 'step': 17762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:57.914325', 'step': 17762, 'epoch': 3} {'type': 'loss', 'content': 0.04551555588841438, 'timestamp': '2025-10-01 04:34:57.916876', 'step': 17763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:57.949016', 'step': 17763, 'epoch': 3} {'type': 'loss', 'content': 0.08458919823169708, 'timestamp': '2025-10-01 04:34:57.972599', 'step': 17764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.003312', 'step': 17764, 'epoch': 3} {'type': 'loss', 'content': 0.019288498908281326, 'timestamp': '2025-10-01 04:34:58.008120', 'step': 17765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.038956', 'step': 17765, 'epoch': 3} {'type': 'loss', 'content': 0.09258949756622314, 'timestamp': '2025-10-01 04:34:58.041125', 'step': 17766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.071595', 'step': 17766, 'epoch': 3} {'type': 'loss', 'content': 0.05723777785897255, 'timestamp': '2025-10-01 04:34:58.073844', 'step': 17767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:58.105285', 'step': 17767, 'epoch': 3} {'type': 'loss', 'content': 0.05159701779484749, 'timestamp': '2025-10-01 04:34:58.129116', 'step': 17768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.159675', 'step': 17768, 'epoch': 3} {'type': 'loss', 'content': 0.03786195069551468, 'timestamp': '2025-10-01 04:34:58.161776', 'step': 17769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.192595', 'step': 17769, 'epoch': 3} {'type': 'loss', 'content': 0.03963795304298401, 'timestamp': '2025-10-01 04:34:58.194775', 'step': 17770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.224813', 'step': 17770, 'epoch': 3} {'type': 'loss', 'content': 0.05243535339832306, 'timestamp': '2025-10-01 04:34:58.227006', 'step': 17771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:58.257630', 'step': 17771, 'epoch': 3} {'type': 'loss', 'content': 0.08432896435260773, 'timestamp': '2025-10-01 04:34:58.282662', 'step': 17772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:58.316846', 'step': 17772, 'epoch': 3} {'type': 'loss', 'content': 0.04579468071460724, 'timestamp': '2025-10-01 04:34:58.319467', 'step': 17773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:58.371278', 'step': 17773, 'epoch': 3} {'type': 'loss', 'content': 0.040961671620607376, 'timestamp': '2025-10-01 04:34:58.373631', 'step': 17774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:58.405015', 'step': 17774, 'epoch': 3} {'type': 'loss', 'content': 0.07892921566963196, 'timestamp': '2025-10-01 04:34:58.407430', 'step': 17775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.440761', 'step': 17775, 'epoch': 3} {'type': 'loss', 'content': 0.05729297921061516, 'timestamp': '2025-10-01 04:34:58.466031', 'step': 17776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:58.498864', 'step': 17776, 'epoch': 3} {'type': 'loss', 'content': 0.05790437385439873, 'timestamp': '2025-10-01 04:34:58.501139', 'step': 17777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:58.545293', 'step': 17777, 'epoch': 3} {'type': 'loss', 'content': 0.0532253235578537, 'timestamp': '2025-10-01 04:34:58.548638', 'step': 17778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:58.585286', 'step': 17778, 'epoch': 3} {'type': 'loss', 'content': 0.10953666269779205, 'timestamp': '2025-10-01 04:34:58.587384', 'step': 17779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.618942', 'step': 17779, 'epoch': 3} {'type': 'loss', 'content': 0.07590661942958832, 'timestamp': '2025-10-01 04:34:58.642599', 'step': 17780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.676752', 'step': 17780, 'epoch': 3} {'type': 'loss', 'content': 0.06351244449615479, 'timestamp': '2025-10-01 04:34:58.678932', 'step': 17781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.722604', 'step': 17781, 'epoch': 3} {'type': 'loss', 'content': 0.07311898469924927, 'timestamp': '2025-10-01 04:34:58.727681', 'step': 17782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:58.760836', 'step': 17782, 'epoch': 3} {'type': 'loss', 'content': 0.022989260032773018, 'timestamp': '2025-10-01 04:34:58.763047', 'step': 17783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:58.805386', 'step': 17783, 'epoch': 3} {'type': 'loss', 'content': 0.03826861083507538, 'timestamp': '2025-10-01 04:34:58.828924', 'step': 17784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:58.866368', 'step': 17784, 'epoch': 3} {'type': 'loss', 'content': 0.04789656028151512, 'timestamp': '2025-10-01 04:34:58.868736', 'step': 17785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:58.909282', 'step': 17785, 'epoch': 3} {'type': 'loss', 'content': 0.05821014940738678, 'timestamp': '2025-10-01 04:34:58.911573', 'step': 17786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:58.942241', 'step': 17786, 'epoch': 3} {'type': 'loss', 'content': 0.04592743143439293, 'timestamp': '2025-10-01 04:34:58.944915', 'step': 17787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:58.980202', 'step': 17787, 'epoch': 3} {'type': 'loss', 'content': 0.02997938171029091, 'timestamp': '2025-10-01 04:34:59.010633', 'step': 17788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:34:59.041590', 'step': 17788, 'epoch': 3} {'type': 'loss', 'content': 0.07139752805233002, 'timestamp': '2025-10-01 04:34:59.043865', 'step': 17789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:59.076995', 'step': 17789, 'epoch': 3} {'type': 'loss', 'content': 0.0675225481390953, 'timestamp': '2025-10-01 04:34:59.079088', 'step': 17790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:59.109776', 'step': 17790, 'epoch': 3} {'type': 'loss', 'content': 0.0509510263800621, 'timestamp': '2025-10-01 04:34:59.111976', 'step': 17791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:59.143229', 'step': 17791, 'epoch': 3} {'type': 'loss', 'content': 0.03535718098282814, 'timestamp': '2025-10-01 04:34:59.167039', 'step': 17792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:59.197812', 'step': 17792, 'epoch': 3} {'type': 'loss', 'content': 0.08439575135707855, 'timestamp': '2025-10-01 04:34:59.199970', 'step': 17793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:59.237807', 'step': 17793, 'epoch': 3} {'type': 'loss', 'content': 0.07696115225553513, 'timestamp': '2025-10-01 04:34:59.240511', 'step': 17794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:59.271686', 'step': 17794, 'epoch': 3} {'type': 'loss', 'content': 0.036996450275182724, 'timestamp': '2025-10-01 04:34:59.274040', 'step': 17795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:59.305169', 'step': 17795, 'epoch': 3} {'type': 'loss', 'content': 0.050350528210401535, 'timestamp': '2025-10-01 04:34:59.329257', 'step': 17796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:59.364813', 'step': 17796, 'epoch': 3} {'type': 'loss', 'content': 0.15396156907081604, 'timestamp': '2025-10-01 04:34:59.367628', 'step': 17797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:59.398587', 'step': 17797, 'epoch': 3} {'type': 'loss', 'content': 0.13485468924045563, 'timestamp': '2025-10-01 04:34:59.401665', 'step': 17798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:59.433402', 'step': 17798, 'epoch': 3} {'type': 'loss', 'content': 0.047445494681596756, 'timestamp': '2025-10-01 04:34:59.435753', 'step': 17799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:59.467265', 'step': 17799, 'epoch': 3} {'type': 'loss', 'content': 0.10937085747718811, 'timestamp': '2025-10-01 04:34:59.491320', 'step': 17800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:59.523546', 'step': 17800, 'epoch': 3} {'type': 'loss', 'content': 0.07272721827030182, 'timestamp': '2025-10-01 04:34:59.527176', 'step': 17801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:59.566592', 'step': 17801, 'epoch': 3} {'type': 'loss', 'content': 0.14759308099746704, 'timestamp': '2025-10-01 04:34:59.568814', 'step': 17802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:59.599719', 'step': 17802, 'epoch': 3} {'type': 'loss', 'content': 0.09600704908370972, 'timestamp': '2025-10-01 04:34:59.602031', 'step': 17803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:59.633446', 'step': 17803, 'epoch': 3} {'type': 'loss', 'content': 0.1324738711118698, 'timestamp': '2025-10-01 04:34:59.657267', 'step': 17804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:34:59.688819', 'step': 17804, 'epoch': 3} {'type': 'loss', 'content': 0.0408015251159668, 'timestamp': '2025-10-01 04:34:59.690978', 'step': 17805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:59.723377', 'step': 17805, 'epoch': 3} {'type': 'loss', 'content': 0.022616559639573097, 'timestamp': '2025-10-01 04:34:59.725911', 'step': 17806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:34:59.759950', 'step': 17806, 'epoch': 3} {'type': 'loss', 'content': 0.052425567060709, 'timestamp': '2025-10-01 04:34:59.762294', 'step': 17807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:59.794159', 'step': 17807, 'epoch': 3} {'type': 'loss', 'content': 0.04741162434220314, 'timestamp': '2025-10-01 04:34:59.817826', 'step': 17808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:59.848537', 'step': 17808, 'epoch': 3} {'type': 'loss', 'content': 0.08143030852079391, 'timestamp': '2025-10-01 04:34:59.850628', 'step': 17809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:34:59.881050', 'step': 17809, 'epoch': 3} {'type': 'loss', 'content': 0.048632510006427765, 'timestamp': '2025-10-01 04:34:59.883137', 'step': 17810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:34:59.916803', 'step': 17810, 'epoch': 3} {'type': 'loss', 'content': 0.028416644781827927, 'timestamp': '2025-10-01 04:34:59.919036', 'step': 17811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:34:59.952608', 'step': 17811, 'epoch': 3} {'type': 'loss', 'content': 0.03426295891404152, 'timestamp': '2025-10-01 04:34:59.976356', 'step': 17812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:00.007471', 'step': 17812, 'epoch': 3} {'type': 'loss', 'content': 0.06095936521887779, 'timestamp': '2025-10-01 04:35:00.009684', 'step': 17813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.040688', 'step': 17813, 'epoch': 3} {'type': 'loss', 'content': 0.1214948520064354, 'timestamp': '2025-10-01 04:35:00.044345', 'step': 17814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.079799', 'step': 17814, 'epoch': 3} {'type': 'loss', 'content': 0.0493747740983963, 'timestamp': '2025-10-01 04:35:00.090848', 'step': 17815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:35:00.127269', 'step': 17815, 'epoch': 3} {'type': 'loss', 'content': 0.11332028359174728, 'timestamp': '2025-10-01 04:35:00.152988', 'step': 17816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.183338', 'step': 17816, 'epoch': 3} {'type': 'loss', 'content': 0.04920893907546997, 'timestamp': '2025-10-01 04:35:00.186026', 'step': 17817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.219186', 'step': 17817, 'epoch': 3} {'type': 'loss', 'content': 0.05248434469103813, 'timestamp': '2025-10-01 04:35:00.222086', 'step': 17818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.254018', 'step': 17818, 'epoch': 3} {'type': 'loss', 'content': 0.15364043414592743, 'timestamp': '2025-10-01 04:35:00.256220', 'step': 17819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:00.287801', 'step': 17819, 'epoch': 3} {'type': 'loss', 'content': 0.05288799852132797, 'timestamp': '2025-10-01 04:35:00.311432', 'step': 17820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.343354', 'step': 17820, 'epoch': 3} {'type': 'loss', 'content': 0.09989118576049805, 'timestamp': '2025-10-01 04:35:00.345481', 'step': 17821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.378995', 'step': 17821, 'epoch': 3} {'type': 'loss', 'content': 0.04353303834795952, 'timestamp': '2025-10-01 04:35:00.381217', 'step': 17822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.415930', 'step': 17822, 'epoch': 3} {'type': 'loss', 'content': 0.08712969720363617, 'timestamp': '2025-10-01 04:35:00.421583', 'step': 17823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:00.457728', 'step': 17823, 'epoch': 3} {'type': 'loss', 'content': 0.04139183089137077, 'timestamp': '2025-10-01 04:35:00.481349', 'step': 17824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:00.512979', 'step': 17824, 'epoch': 3} {'type': 'loss', 'content': 0.05091460421681404, 'timestamp': '2025-10-01 04:35:00.515416', 'step': 17825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.547105', 'step': 17825, 'epoch': 3} {'type': 'loss', 'content': 0.17168796062469482, 'timestamp': '2025-10-01 04:35:00.549934', 'step': 17826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:00.584712', 'step': 17826, 'epoch': 3} {'type': 'loss', 'content': 0.04809100180864334, 'timestamp': '2025-10-01 04:35:00.587036', 'step': 17827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:00.622035', 'step': 17827, 'epoch': 3} {'type': 'loss', 'content': 0.06710126250982285, 'timestamp': '2025-10-01 04:35:00.645651', 'step': 17828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.679280', 'step': 17828, 'epoch': 3} {'type': 'loss', 'content': 0.056795883923769, 'timestamp': '2025-10-01 04:35:00.681432', 'step': 17829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:00.719880', 'step': 17829, 'epoch': 3} {'type': 'loss', 'content': 0.021150676533579826, 'timestamp': '2025-10-01 04:35:00.722192', 'step': 17830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:00.753523', 'step': 17830, 'epoch': 3} {'type': 'loss', 'content': 0.03562368452548981, 'timestamp': '2025-10-01 04:35:00.756121', 'step': 17831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:00.787610', 'step': 17831, 'epoch': 3} {'type': 'loss', 'content': 0.07874223589897156, 'timestamp': '2025-10-01 04:35:00.811022', 'step': 17832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.842627', 'step': 17832, 'epoch': 3} {'type': 'loss', 'content': 0.09941703081130981, 'timestamp': '2025-10-01 04:35:00.844746', 'step': 17833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:00.887894', 'step': 17833, 'epoch': 3} {'type': 'loss', 'content': 0.08121004700660706, 'timestamp': '2025-10-01 04:35:00.892576', 'step': 17834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:00.927888', 'step': 17834, 'epoch': 3} {'type': 'loss', 'content': 0.07370930910110474, 'timestamp': '2025-10-01 04:35:00.930058', 'step': 17835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:00.962025', 'step': 17835, 'epoch': 3} {'type': 'loss', 'content': 0.05884484946727753, 'timestamp': '2025-10-01 04:35:00.986064', 'step': 17836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:01.021137', 'step': 17836, 'epoch': 3} {'type': 'loss', 'content': 0.059874411672353745, 'timestamp': '2025-10-01 04:35:01.023726', 'step': 17837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:01.061684', 'step': 17837, 'epoch': 3} {'type': 'loss', 'content': 0.043425314128398895, 'timestamp': '2025-10-01 04:35:01.064315', 'step': 17838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:01.097038', 'step': 17838, 'epoch': 3} {'type': 'loss', 'content': 0.10369553416967392, 'timestamp': '2025-10-01 04:35:01.099565', 'step': 17839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:01.130736', 'step': 17839, 'epoch': 3} {'type': 'loss', 'content': 0.11654769629240036, 'timestamp': '2025-10-01 04:35:01.154447', 'step': 17840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:01.188985', 'step': 17840, 'epoch': 3} {'type': 'loss', 'content': 0.10106576234102249, 'timestamp': '2025-10-01 04:35:01.191105', 'step': 17841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:01.227614', 'step': 17841, 'epoch': 3} {'type': 'loss', 'content': 0.03080000728368759, 'timestamp': '2025-10-01 04:35:01.229988', 'step': 17842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:01.274072', 'step': 17842, 'epoch': 3} {'type': 'loss', 'content': 0.05452330783009529, 'timestamp': '2025-10-01 04:35:01.276397', 'step': 17843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:01.308998', 'step': 17843, 'epoch': 3} {'type': 'loss', 'content': 0.08257545530796051, 'timestamp': '2025-10-01 04:35:01.333003', 'step': 17844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:01.369522', 'step': 17844, 'epoch': 3} {'type': 'loss', 'content': 0.0975625216960907, 'timestamp': '2025-10-01 04:35:01.372102', 'step': 17845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:01.404101', 'step': 17845, 'epoch': 3} {'type': 'loss', 'content': 0.06919824331998825, 'timestamp': '2025-10-01 04:35:01.406850', 'step': 17846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:01.438196', 'step': 17846, 'epoch': 3} {'type': 'loss', 'content': 0.027377789840102196, 'timestamp': '2025-10-01 04:35:01.440939', 'step': 17847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:01.480508', 'step': 17847, 'epoch': 3} {'type': 'loss', 'content': 0.07692047208547592, 'timestamp': '2025-10-01 04:35:01.504472', 'step': 17848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:01.537854', 'step': 17848, 'epoch': 3} {'type': 'loss', 'content': 0.15343046188354492, 'timestamp': '2025-10-01 04:35:01.540199', 'step': 17849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:01.574500', 'step': 17849, 'epoch': 3} {'type': 'loss', 'content': 0.10934161394834518, 'timestamp': '2025-10-01 04:35:01.576906', 'step': 17850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:01.609476', 'step': 17850, 'epoch': 3} {'type': 'loss', 'content': 0.06262513995170593, 'timestamp': '2025-10-01 04:35:01.612814', 'step': 17851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:01.644362', 'step': 17851, 'epoch': 3} {'type': 'loss', 'content': 0.0742369070649147, 'timestamp': '2025-10-01 04:35:01.668566', 'step': 17852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:01.699242', 'step': 17852, 'epoch': 3} {'type': 'loss', 'content': 0.11292767524719238, 'timestamp': '2025-10-01 04:35:01.701864', 'step': 17853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:01.733259', 'step': 17853, 'epoch': 3} {'type': 'loss', 'content': 0.12830191850662231, 'timestamp': '2025-10-01 04:35:01.737300', 'step': 17854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:01.769257', 'step': 17854, 'epoch': 3} {'type': 'loss', 'content': 0.06909110397100449, 'timestamp': '2025-10-01 04:35:01.774098', 'step': 17855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:35:01.806468', 'step': 17855, 'epoch': 3} {'type': 'loss', 'content': 0.13609442114830017, 'timestamp': '2025-10-01 04:35:01.832204', 'step': 17856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:01.864298', 'step': 17856, 'epoch': 3} {'type': 'loss', 'content': 0.15177097916603088, 'timestamp': '2025-10-01 04:35:01.866892', 'step': 17857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:01.898565', 'step': 17857, 'epoch': 3} {'type': 'loss', 'content': 0.13657163083553314, 'timestamp': '2025-10-01 04:35:01.901107', 'step': 17858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:01.934567', 'step': 17858, 'epoch': 3} {'type': 'loss', 'content': 0.10948514938354492, 'timestamp': '2025-10-01 04:35:01.936894', 'step': 17859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:01.969302', 'step': 17859, 'epoch': 3} {'type': 'loss', 'content': 0.10738524794578552, 'timestamp': '2025-10-01 04:35:01.993516', 'step': 17860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.025624', 'step': 17860, 'epoch': 3} {'type': 'loss', 'content': 0.05572850629687309, 'timestamp': '2025-10-01 04:35:02.028211', 'step': 17861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.060829', 'step': 17861, 'epoch': 3} {'type': 'loss', 'content': 0.051868218928575516, 'timestamp': '2025-10-01 04:35:02.063991', 'step': 17862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.097729', 'step': 17862, 'epoch': 3} {'type': 'loss', 'content': 0.04520405828952789, 'timestamp': '2025-10-01 04:35:02.100352', 'step': 17863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:02.133710', 'step': 17863, 'epoch': 3} {'type': 'loss', 'content': 0.03645951673388481, 'timestamp': '2025-10-01 04:35:02.157849', 'step': 17864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.190303', 'step': 17864, 'epoch': 3} {'type': 'loss', 'content': 0.07238610088825226, 'timestamp': '2025-10-01 04:35:02.192968', 'step': 17865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:02.229026', 'step': 17865, 'epoch': 3} {'type': 'loss', 'content': 0.0602954663336277, 'timestamp': '2025-10-01 04:35:02.231518', 'step': 17866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:02.264379', 'step': 17866, 'epoch': 3} {'type': 'loss', 'content': 0.07651498168706894, 'timestamp': '2025-10-01 04:35:02.266933', 'step': 17867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.298581', 'step': 17867, 'epoch': 3} {'type': 'loss', 'content': 0.07957020401954651, 'timestamp': '2025-10-01 04:35:02.322839', 'step': 17868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:02.354056', 'step': 17868, 'epoch': 3} {'type': 'loss', 'content': 0.04454042389988899, 'timestamp': '2025-10-01 04:35:02.357271', 'step': 17869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.388339', 'step': 17869, 'epoch': 3} {'type': 'loss', 'content': 0.0382656566798687, 'timestamp': '2025-10-01 04:35:02.390941', 'step': 17870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.422401', 'step': 17870, 'epoch': 3} {'type': 'loss', 'content': 0.034821756184101105, 'timestamp': '2025-10-01 04:35:02.424965', 'step': 17871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:02.458722', 'step': 17871, 'epoch': 3} {'type': 'loss', 'content': 0.17325326800346375, 'timestamp': '2025-10-01 04:35:02.482713', 'step': 17872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.514145', 'step': 17872, 'epoch': 3} {'type': 'loss', 'content': 0.08101028203964233, 'timestamp': '2025-10-01 04:35:02.517760', 'step': 17873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.548598', 'step': 17873, 'epoch': 3} {'type': 'loss', 'content': 0.07380752265453339, 'timestamp': '2025-10-01 04:35:02.551911', 'step': 17874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:02.584708', 'step': 17874, 'epoch': 3} {'type': 'loss', 'content': 0.04005222022533417, 'timestamp': '2025-10-01 04:35:02.587639', 'step': 17875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:02.635654', 'step': 17875, 'epoch': 3} {'type': 'loss', 'content': 0.08438212424516678, 'timestamp': '2025-10-01 04:35:02.659957', 'step': 17876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.691355', 'step': 17876, 'epoch': 3} {'type': 'loss', 'content': 0.07398415356874466, 'timestamp': '2025-10-01 04:35:02.693446', 'step': 17877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:02.725645', 'step': 17877, 'epoch': 3} {'type': 'loss', 'content': 0.14204029738903046, 'timestamp': '2025-10-01 04:35:02.728285', 'step': 17878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.770819', 'step': 17878, 'epoch': 3} {'type': 'loss', 'content': 0.09506906569004059, 'timestamp': '2025-10-01 04:35:02.773232', 'step': 17879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:02.805318', 'step': 17879, 'epoch': 3} {'type': 'loss', 'content': 0.14111481606960297, 'timestamp': '2025-10-01 04:35:02.829430', 'step': 17880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:02.861325', 'step': 17880, 'epoch': 3} {'type': 'loss', 'content': 0.07129987329244614, 'timestamp': '2025-10-01 04:35:02.863687', 'step': 17881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:02.895828', 'step': 17881, 'epoch': 3} {'type': 'loss', 'content': 0.09489864110946655, 'timestamp': '2025-10-01 04:35:02.897966', 'step': 17882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:02.929493', 'step': 17882, 'epoch': 3} {'type': 'loss', 'content': 0.15706782042980194, 'timestamp': '2025-10-01 04:35:02.931640', 'step': 17883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:02.962692', 'step': 17883, 'epoch': 3} {'type': 'loss', 'content': 0.12171176075935364, 'timestamp': '2025-10-01 04:35:02.986451', 'step': 17884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:03.017588', 'step': 17884, 'epoch': 3} {'type': 'loss', 'content': 0.08952163904905319, 'timestamp': '2025-10-01 04:35:03.019777', 'step': 17885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:03.051564', 'step': 17885, 'epoch': 3} {'type': 'loss', 'content': 0.06958161294460297, 'timestamp': '2025-10-01 04:35:03.053783', 'step': 17886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:03.084134', 'step': 17886, 'epoch': 3} {'type': 'loss', 'content': 0.0723111554980278, 'timestamp': '2025-10-01 04:35:03.086436', 'step': 17887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:03.118796', 'step': 17887, 'epoch': 3} {'type': 'loss', 'content': 0.027885694056749344, 'timestamp': '2025-10-01 04:35:03.142447', 'step': 17888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:03.173973', 'step': 17888, 'epoch': 3} {'type': 'loss', 'content': 0.094235360622406, 'timestamp': '2025-10-01 04:35:03.176126', 'step': 17889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:03.208371', 'step': 17889, 'epoch': 3} {'type': 'loss', 'content': 0.06144826486706734, 'timestamp': '2025-10-01 04:35:03.210740', 'step': 17890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:03.245167', 'step': 17890, 'epoch': 3} {'type': 'loss', 'content': 0.11602434515953064, 'timestamp': '2025-10-01 04:35:03.247495', 'step': 17891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:03.280228', 'step': 17891, 'epoch': 3} {'type': 'loss', 'content': 0.14201894402503967, 'timestamp': '2025-10-01 04:35:03.303851', 'step': 17892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:03.335743', 'step': 17892, 'epoch': 3} {'type': 'loss', 'content': 0.11499692499637604, 'timestamp': '2025-10-01 04:35:03.337889', 'step': 17893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:03.371191', 'step': 17893, 'epoch': 3} {'type': 'loss', 'content': 0.02580409124493599, 'timestamp': '2025-10-01 04:35:03.373966', 'step': 17894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:03.406005', 'step': 17894, 'epoch': 3} {'type': 'loss', 'content': 0.05968579277396202, 'timestamp': '2025-10-01 04:35:03.408153', 'step': 17895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:03.438719', 'step': 17895, 'epoch': 3} {'type': 'loss', 'content': 0.0685441866517067, 'timestamp': '2025-10-01 04:35:03.463213', 'step': 17896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:35:03.494244', 'step': 17896, 'epoch': 3} {'type': 'loss', 'content': 0.07643568515777588, 'timestamp': '2025-10-01 04:35:03.499084', 'step': 17897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:03.537998', 'step': 17897, 'epoch': 3} {'type': 'loss', 'content': 0.05811450257897377, 'timestamp': '2025-10-01 04:35:03.540146', 'step': 17898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:03.571010', 'step': 17898, 'epoch': 3} {'type': 'loss', 'content': 0.02891502156853676, 'timestamp': '2025-10-01 04:35:03.573094', 'step': 17899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:03.602984', 'step': 17899, 'epoch': 3} {'type': 'loss', 'content': 0.08102878928184509, 'timestamp': '2025-10-01 04:35:03.626577', 'step': 17900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:03.656076', 'step': 17900, 'epoch': 3} {'type': 'loss', 'content': 0.06887663155794144, 'timestamp': '2025-10-01 04:35:03.658747', 'step': 17901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:03.689713', 'step': 17901, 'epoch': 3} {'type': 'loss', 'content': 0.05923347920179367, 'timestamp': '2025-10-01 04:35:03.691892', 'step': 17902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:03.722874', 'step': 17902, 'epoch': 3} {'type': 'loss', 'content': 0.10899670422077179, 'timestamp': '2025-10-01 04:35:03.724979', 'step': 17903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:03.757648', 'step': 17903, 'epoch': 3} {'type': 'loss', 'content': 0.07253398001194, 'timestamp': '2025-10-01 04:35:03.781859', 'step': 17904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:03.814023', 'step': 17904, 'epoch': 3} {'type': 'loss', 'content': 0.04246065393090248, 'timestamp': '2025-10-01 04:35:03.816524', 'step': 17905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:03.849052', 'step': 17905, 'epoch': 3} {'type': 'loss', 'content': 0.030746182426810265, 'timestamp': '2025-10-01 04:35:03.851821', 'step': 17906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:03.882687', 'step': 17906, 'epoch': 3} {'type': 'loss', 'content': 0.04844648391008377, 'timestamp': '2025-10-01 04:35:03.885261', 'step': 17907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:35:03.916333', 'step': 17907, 'epoch': 3} {'type': 'loss', 'content': 0.1056084856390953, 'timestamp': '2025-10-01 04:35:03.941726', 'step': 17908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:03.973496', 'step': 17908, 'epoch': 3} {'type': 'loss', 'content': 0.13245394825935364, 'timestamp': '2025-10-01 04:35:03.975652', 'step': 17909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:04.007249', 'step': 17909, 'epoch': 3} {'type': 'loss', 'content': 0.08459607511758804, 'timestamp': '2025-10-01 04:35:04.009867', 'step': 17910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:04.041916', 'step': 17910, 'epoch': 3} {'type': 'loss', 'content': 0.08836052566766739, 'timestamp': '2025-10-01 04:35:04.044064', 'step': 17911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.075194', 'step': 17911, 'epoch': 3} {'type': 'loss', 'content': 0.08526249974966049, 'timestamp': '2025-10-01 04:35:04.098839', 'step': 17912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:04.130649', 'step': 17912, 'epoch': 3} {'type': 'loss', 'content': 0.08055280894041061, 'timestamp': '2025-10-01 04:35:04.132788', 'step': 17913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.163682', 'step': 17913, 'epoch': 3} {'type': 'loss', 'content': 0.07998989522457123, 'timestamp': '2025-10-01 04:35:04.165921', 'step': 17914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.196363', 'step': 17914, 'epoch': 3} {'type': 'loss', 'content': 0.09703398495912552, 'timestamp': '2025-10-01 04:35:04.198499', 'step': 17915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:04.229159', 'step': 17915, 'epoch': 3} {'type': 'loss', 'content': 0.03661889582872391, 'timestamp': '2025-10-01 04:35:04.252814', 'step': 17916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.283016', 'step': 17916, 'epoch': 3} {'type': 'loss', 'content': 0.06656394898891449, 'timestamp': '2025-10-01 04:35:04.285136', 'step': 17917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:04.314759', 'step': 17917, 'epoch': 3} {'type': 'loss', 'content': 0.08419875055551529, 'timestamp': '2025-10-01 04:35:04.317449', 'step': 17918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:35:04.347993', 'step': 17918, 'epoch': 3} {'type': 'loss', 'content': 0.07179079949855804, 'timestamp': '2025-10-01 04:35:04.352223', 'step': 17919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:04.388871', 'step': 17919, 'epoch': 3} {'type': 'loss', 'content': 0.06111539900302887, 'timestamp': '2025-10-01 04:35:04.412957', 'step': 17920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.444303', 'step': 17920, 'epoch': 3} {'type': 'loss', 'content': 0.05244467779994011, 'timestamp': '2025-10-01 04:35:04.446473', 'step': 17921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.477123', 'step': 17921, 'epoch': 3} {'type': 'loss', 'content': 0.10148598998785019, 'timestamp': '2025-10-01 04:35:04.479315', 'step': 17922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.510160', 'step': 17922, 'epoch': 3} {'type': 'loss', 'content': 0.0766848549246788, 'timestamp': '2025-10-01 04:35:04.512291', 'step': 17923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.542439', 'step': 17923, 'epoch': 3} {'type': 'loss', 'content': 0.043215710669755936, 'timestamp': '2025-10-01 04:35:04.566080', 'step': 17924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:04.597891', 'step': 17924, 'epoch': 3} {'type': 'loss', 'content': 0.033248260617256165, 'timestamp': '2025-10-01 04:35:04.600066', 'step': 17925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.631395', 'step': 17925, 'epoch': 3} {'type': 'loss', 'content': 0.10800470411777496, 'timestamp': '2025-10-01 04:35:04.633680', 'step': 17926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:04.664967', 'step': 17926, 'epoch': 3} {'type': 'loss', 'content': 0.09300858527421951, 'timestamp': '2025-10-01 04:35:04.667465', 'step': 17927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.697938', 'step': 17927, 'epoch': 3} {'type': 'loss', 'content': 0.018448321148753166, 'timestamp': '2025-10-01 04:35:04.721538', 'step': 17928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:04.752972', 'step': 17928, 'epoch': 3} {'type': 'loss', 'content': 0.09828469902276993, 'timestamp': '2025-10-01 04:35:04.755414', 'step': 17929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:04.788463', 'step': 17929, 'epoch': 3} {'type': 'loss', 'content': 0.13797949254512787, 'timestamp': '2025-10-01 04:35:04.791372', 'step': 17930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:04.821698', 'step': 17930, 'epoch': 3} {'type': 'loss', 'content': 0.041035477072000504, 'timestamp': '2025-10-01 04:35:04.823743', 'step': 17931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.854615', 'step': 17931, 'epoch': 3} {'type': 'loss', 'content': 0.013949811458587646, 'timestamp': '2025-10-01 04:35:04.878344', 'step': 17932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.911874', 'step': 17932, 'epoch': 3} {'type': 'loss', 'content': 0.044130582362413406, 'timestamp': '2025-10-01 04:35:04.914326', 'step': 17933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:04.945275', 'step': 17933, 'epoch': 3} {'type': 'loss', 'content': 0.04024290293455124, 'timestamp': '2025-10-01 04:35:04.948050', 'step': 17934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:04.978869', 'step': 17934, 'epoch': 3} {'type': 'loss', 'content': 0.02733355015516281, 'timestamp': '2025-10-01 04:35:04.981453', 'step': 17935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.012179', 'step': 17935, 'epoch': 3} {'type': 'loss', 'content': 0.09674885869026184, 'timestamp': '2025-10-01 04:35:05.035886', 'step': 17936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:05.066209', 'step': 17936, 'epoch': 3} {'type': 'loss', 'content': 0.029338635504245758, 'timestamp': '2025-10-01 04:35:05.068766', 'step': 17937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:05.099261', 'step': 17937, 'epoch': 3} {'type': 'loss', 'content': 0.09513894468545914, 'timestamp': '2025-10-01 04:35:05.101784', 'step': 17938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.133181', 'step': 17938, 'epoch': 3} {'type': 'loss', 'content': 0.03500084578990936, 'timestamp': '2025-10-01 04:35:05.135978', 'step': 17939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:05.166423', 'step': 17939, 'epoch': 3} {'type': 'loss', 'content': 0.06309247761964798, 'timestamp': '2025-10-01 04:35:05.190394', 'step': 17940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.220275', 'step': 17940, 'epoch': 3} {'type': 'loss', 'content': 0.06947771459817886, 'timestamp': '2025-10-01 04:35:05.222647', 'step': 17941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:05.253520', 'step': 17941, 'epoch': 3} {'type': 'loss', 'content': 0.07094697654247284, 'timestamp': '2025-10-01 04:35:05.255979', 'step': 17942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.287622', 'step': 17942, 'epoch': 3} {'type': 'loss', 'content': 0.055723827332258224, 'timestamp': '2025-10-01 04:35:05.289751', 'step': 17943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:05.320178', 'step': 17943, 'epoch': 3} {'type': 'loss', 'content': 0.03085339069366455, 'timestamp': '2025-10-01 04:35:05.343754', 'step': 17944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:05.374405', 'step': 17944, 'epoch': 3} {'type': 'loss', 'content': 0.06088314950466156, 'timestamp': '2025-10-01 04:35:05.376556', 'step': 17945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.406965', 'step': 17945, 'epoch': 3} {'type': 'loss', 'content': 0.025338096544146538, 'timestamp': '2025-10-01 04:35:05.409141', 'step': 17946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:05.439474', 'step': 17946, 'epoch': 3} {'type': 'loss', 'content': 0.07638195157051086, 'timestamp': '2025-10-01 04:35:05.441700', 'step': 17947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:05.472614', 'step': 17947, 'epoch': 3} {'type': 'loss', 'content': 0.0995989516377449, 'timestamp': '2025-10-01 04:35:05.496348', 'step': 17948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.527417', 'step': 17948, 'epoch': 3} {'type': 'loss', 'content': 0.07832171022891998, 'timestamp': '2025-10-01 04:35:05.529778', 'step': 17949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.560215', 'step': 17949, 'epoch': 3} {'type': 'loss', 'content': 0.0849609300494194, 'timestamp': '2025-10-01 04:35:05.564222', 'step': 17950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.597391', 'step': 17950, 'epoch': 3} {'type': 'loss', 'content': 0.0724836066365242, 'timestamp': '2025-10-01 04:35:05.599634', 'step': 17951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.630066', 'step': 17951, 'epoch': 3} {'type': 'loss', 'content': 0.05088309943675995, 'timestamp': '2025-10-01 04:35:05.653706', 'step': 17952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:05.684527', 'step': 17952, 'epoch': 3} {'type': 'loss', 'content': 0.10189145058393478, 'timestamp': '2025-10-01 04:35:05.688377', 'step': 17953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.726136', 'step': 17953, 'epoch': 3} {'type': 'loss', 'content': 0.0598483644425869, 'timestamp': '2025-10-01 04:35:05.729551', 'step': 17954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:05.760982', 'step': 17954, 'epoch': 3} {'type': 'loss', 'content': 0.06608331948518753, 'timestamp': '2025-10-01 04:35:05.763906', 'step': 17955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:05.797251', 'step': 17955, 'epoch': 3} {'type': 'loss', 'content': 0.11848923563957214, 'timestamp': '2025-10-01 04:35:05.821107', 'step': 17956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:05.853288', 'step': 17956, 'epoch': 3} {'type': 'loss', 'content': 0.04256059601902962, 'timestamp': '2025-10-01 04:35:05.856003', 'step': 17957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:05.886690', 'step': 17957, 'epoch': 3} {'type': 'loss', 'content': 0.10550963878631592, 'timestamp': '2025-10-01 04:35:05.889091', 'step': 17958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:05.919372', 'step': 17958, 'epoch': 3} {'type': 'loss', 'content': 0.03458569943904877, 'timestamp': '2025-10-01 04:35:05.922147', 'step': 17959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:05.997335', 'step': 17959, 'epoch': 3} {'type': 'loss', 'content': 0.09149270504713058, 'timestamp': '2025-10-01 04:35:06.021200', 'step': 17960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:06.054051', 'step': 17960, 'epoch': 3} {'type': 'loss', 'content': 0.05721427872776985, 'timestamp': '2025-10-01 04:35:06.056714', 'step': 17961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:06.088934', 'step': 17961, 'epoch': 3} {'type': 'loss', 'content': 0.06204059720039368, 'timestamp': '2025-10-01 04:35:06.091281', 'step': 17962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:06.122803', 'step': 17962, 'epoch': 3} {'type': 'loss', 'content': 0.1019999235868454, 'timestamp': '2025-10-01 04:35:06.125469', 'step': 17963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:06.158385', 'step': 17963, 'epoch': 3} {'type': 'loss', 'content': 0.06548131257295609, 'timestamp': '2025-10-01 04:35:06.183929', 'step': 17964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:06.216977', 'step': 17964, 'epoch': 3} {'type': 'loss', 'content': 0.1649196594953537, 'timestamp': '2025-10-01 04:35:06.219193', 'step': 17965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:06.250758', 'step': 17965, 'epoch': 3} {'type': 'loss', 'content': 0.03888781741261482, 'timestamp': '2025-10-01 04:35:06.253381', 'step': 17966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:06.284964', 'step': 17966, 'epoch': 3} {'type': 'loss', 'content': 0.06238362938165665, 'timestamp': '2025-10-01 04:35:06.287262', 'step': 17967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:06.318067', 'step': 17967, 'epoch': 3} {'type': 'loss', 'content': 0.03729558736085892, 'timestamp': '2025-10-01 04:35:06.342931', 'step': 17968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:06.374803', 'step': 17968, 'epoch': 3} {'type': 'loss', 'content': 0.04373880848288536, 'timestamp': '2025-10-01 04:35:06.377107', 'step': 17969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:06.407979', 'step': 17969, 'epoch': 3} {'type': 'loss', 'content': 0.021275652572512627, 'timestamp': '2025-10-01 04:35:06.410058', 'step': 17970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:06.440838', 'step': 17970, 'epoch': 3} {'type': 'loss', 'content': 0.11595434695482254, 'timestamp': '2025-10-01 04:35:06.443007', 'step': 17971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:06.473500', 'step': 17971, 'epoch': 3} {'type': 'loss', 'content': 0.07397779822349548, 'timestamp': '2025-10-01 04:35:06.497428', 'step': 17972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:06.527929', 'step': 17972, 'epoch': 3} {'type': 'loss', 'content': 0.10000762343406677, 'timestamp': '2025-10-01 04:35:06.530008', 'step': 17973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:06.573321', 'step': 17973, 'epoch': 3} {'type': 'loss', 'content': 0.05250494182109833, 'timestamp': '2025-10-01 04:35:06.575528', 'step': 17974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:06.609582', 'step': 17974, 'epoch': 3} {'type': 'loss', 'content': 0.017514748498797417, 'timestamp': '2025-10-01 04:35:06.611835', 'step': 17975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:06.642075', 'step': 17975, 'epoch': 3} {'type': 'loss', 'content': 0.07140633463859558, 'timestamp': '2025-10-01 04:35:06.665874', 'step': 17976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:06.697527', 'step': 17976, 'epoch': 3} {'type': 'loss', 'content': 0.03394060581922531, 'timestamp': '2025-10-01 04:35:06.699898', 'step': 17977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:06.730264', 'step': 17977, 'epoch': 3} {'type': 'loss', 'content': 0.15519458055496216, 'timestamp': '2025-10-01 04:35:06.732408', 'step': 17978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:06.763820', 'step': 17978, 'epoch': 3} {'type': 'loss', 'content': 0.09474743902683258, 'timestamp': '2025-10-01 04:35:06.765980', 'step': 17979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:06.796789', 'step': 17979, 'epoch': 3} {'type': 'loss', 'content': 0.06119772419333458, 'timestamp': '2025-10-01 04:35:06.820471', 'step': 17980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:06.850471', 'step': 17980, 'epoch': 3} {'type': 'loss', 'content': 0.012964921072125435, 'timestamp': '2025-10-01 04:35:06.852551', 'step': 17981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:06.882991', 'step': 17981, 'epoch': 3} {'type': 'loss', 'content': 0.03715607523918152, 'timestamp': '2025-10-01 04:35:06.885236', 'step': 17982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:06.915839', 'step': 17982, 'epoch': 3} {'type': 'loss', 'content': 0.07347425073385239, 'timestamp': '2025-10-01 04:35:06.918001', 'step': 17983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:06.948104', 'step': 17983, 'epoch': 3} {'type': 'loss', 'content': 0.06391087174415588, 'timestamp': '2025-10-01 04:35:06.971799', 'step': 17984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:07.008600', 'step': 17984, 'epoch': 3} {'type': 'loss', 'content': 0.03118228167295456, 'timestamp': '2025-10-01 04:35:07.013239', 'step': 17985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:07.057015', 'step': 17985, 'epoch': 3} {'type': 'loss', 'content': 0.0512397438287735, 'timestamp': '2025-10-01 04:35:07.059097', 'step': 17986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:07.090795', 'step': 17986, 'epoch': 3} {'type': 'loss', 'content': 0.04059372842311859, 'timestamp': '2025-10-01 04:35:07.092970', 'step': 17987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:07.138478', 'step': 17987, 'epoch': 3} {'type': 'loss', 'content': 0.05035622790455818, 'timestamp': '2025-10-01 04:35:07.162083', 'step': 17988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:07.193687', 'step': 17988, 'epoch': 3} {'type': 'loss', 'content': 0.15027405321598053, 'timestamp': '2025-10-01 04:35:07.195822', 'step': 17989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:07.226854', 'step': 17989, 'epoch': 3} {'type': 'loss', 'content': 0.08284271508455276, 'timestamp': '2025-10-01 04:35:07.229088', 'step': 17990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:07.259883', 'step': 17990, 'epoch': 3} {'type': 'loss', 'content': 0.08293735980987549, 'timestamp': '2025-10-01 04:35:07.262160', 'step': 17991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:07.294441', 'step': 17991, 'epoch': 3} {'type': 'loss', 'content': 0.0736745074391365, 'timestamp': '2025-10-01 04:35:07.318179', 'step': 17992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:07.348760', 'step': 17992, 'epoch': 3} {'type': 'loss', 'content': 0.08759793639183044, 'timestamp': '2025-10-01 04:35:07.350906', 'step': 17993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:07.382180', 'step': 17993, 'epoch': 3} {'type': 'loss', 'content': 0.09002138674259186, 'timestamp': '2025-10-01 04:35:07.384254', 'step': 17994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:07.415597', 'step': 17994, 'epoch': 3} {'type': 'loss', 'content': 0.05242254212498665, 'timestamp': '2025-10-01 04:35:07.417904', 'step': 17995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:07.448386', 'step': 17995, 'epoch': 3} {'type': 'loss', 'content': 0.02944088540971279, 'timestamp': '2025-10-01 04:35:07.476635', 'step': 17996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:07.507210', 'step': 17996, 'epoch': 3} {'type': 'loss', 'content': 0.10162058472633362, 'timestamp': '2025-10-01 04:35:07.509415', 'step': 17997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:07.542044', 'step': 17997, 'epoch': 3} {'type': 'loss', 'content': 0.05332524701952934, 'timestamp': '2025-10-01 04:35:07.544135', 'step': 17998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:07.587372', 'step': 17998, 'epoch': 3} {'type': 'loss', 'content': 0.05387084558606148, 'timestamp': '2025-10-01 04:35:07.589787', 'step': 17999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:07.622912', 'step': 17999, 'epoch': 3} {'type': 'loss', 'content': 0.1610393077135086, 'timestamp': '2025-10-01 04:35:07.649221', 'step': 18000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18000', 'timestamp': '2025-10-01 04:35:13.232230', 'step': 18000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:13.275695', 'step': 18000, 'epoch': 3} {'type': 'loss', 'content': 0.07885586470365524, 'timestamp': '2025-10-01 04:35:13.277831', 'step': 18001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:13.308932', 'step': 18001, 'epoch': 3} {'type': 'loss', 'content': 0.03763958066701889, 'timestamp': '2025-10-01 04:35:13.311143', 'step': 18002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:13.342413', 'step': 18002, 'epoch': 3} {'type': 'loss', 'content': 0.07935506105422974, 'timestamp': '2025-10-01 04:35:13.344923', 'step': 18003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:13.375469', 'step': 18003, 'epoch': 3} {'type': 'loss', 'content': 0.04079154506325722, 'timestamp': '2025-10-01 04:35:13.399630', 'step': 18004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:13.435312', 'step': 18004, 'epoch': 3} {'type': 'loss', 'content': 0.03626786917448044, 'timestamp': '2025-10-01 04:35:13.437505', 'step': 18005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:13.468681', 'step': 18005, 'epoch': 3} {'type': 'loss', 'content': 0.06667061150074005, 'timestamp': '2025-10-01 04:35:13.471057', 'step': 18006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:13.501826', 'step': 18006, 'epoch': 3} {'type': 'loss', 'content': 0.04047168791294098, 'timestamp': '2025-10-01 04:35:13.503994', 'step': 18007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:13.534796', 'step': 18007, 'epoch': 3} {'type': 'loss', 'content': 0.05984741076827049, 'timestamp': '2025-10-01 04:35:13.559352', 'step': 18008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:13.589953', 'step': 18008, 'epoch': 3} {'type': 'loss', 'content': 0.06832695752382278, 'timestamp': '2025-10-01 04:35:13.592107', 'step': 18009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:13.626856', 'step': 18009, 'epoch': 3} {'type': 'loss', 'content': 0.042792513966560364, 'timestamp': '2025-10-01 04:35:13.629080', 'step': 18010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:13.660567', 'step': 18010, 'epoch': 3} {'type': 'loss', 'content': 0.11854034662246704, 'timestamp': '2025-10-01 04:35:13.662859', 'step': 18011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:13.693553', 'step': 18011, 'epoch': 3} {'type': 'loss', 'content': 0.1139533519744873, 'timestamp': '2025-10-01 04:35:13.717898', 'step': 18012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:13.747974', 'step': 18012, 'epoch': 3} {'type': 'loss', 'content': 0.0715976282954216, 'timestamp': '2025-10-01 04:35:13.750153', 'step': 18013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:13.780546', 'step': 18013, 'epoch': 3} {'type': 'loss', 'content': 0.0649697408080101, 'timestamp': '2025-10-01 04:35:13.782800', 'step': 18014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:13.814271', 'step': 18014, 'epoch': 3} {'type': 'loss', 'content': 0.050428085029125214, 'timestamp': '2025-10-01 04:35:13.816528', 'step': 18015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:13.846957', 'step': 18015, 'epoch': 3} {'type': 'loss', 'content': 0.04959399253129959, 'timestamp': '2025-10-01 04:35:13.870623', 'step': 18016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:13.901264', 'step': 18016, 'epoch': 3} {'type': 'loss', 'content': 0.0528419129550457, 'timestamp': '2025-10-01 04:35:13.903496', 'step': 18017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:13.934981', 'step': 18017, 'epoch': 3} {'type': 'loss', 'content': 0.035632696002721786, 'timestamp': '2025-10-01 04:35:13.937971', 'step': 18018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:13.969169', 'step': 18018, 'epoch': 3} {'type': 'loss', 'content': 0.05095677077770233, 'timestamp': '2025-10-01 04:35:13.971421', 'step': 18019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:14.003092', 'step': 18019, 'epoch': 3} {'type': 'loss', 'content': 0.05844777822494507, 'timestamp': '2025-10-01 04:35:14.026908', 'step': 18020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:14.057352', 'step': 18020, 'epoch': 3} {'type': 'loss', 'content': 0.10651331394910812, 'timestamp': '2025-10-01 04:35:14.059522', 'step': 18021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:14.090351', 'step': 18021, 'epoch': 3} {'type': 'loss', 'content': 0.030785251408815384, 'timestamp': '2025-10-01 04:35:14.092548', 'step': 18022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:14.122941', 'step': 18022, 'epoch': 3} {'type': 'loss', 'content': 0.12795744836330414, 'timestamp': '2025-10-01 04:35:14.125370', 'step': 18023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:14.166713', 'step': 18023, 'epoch': 3} {'type': 'loss', 'content': 0.04055703058838844, 'timestamp': '2025-10-01 04:35:14.191409', 'step': 18024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:14.221648', 'step': 18024, 'epoch': 3} {'type': 'loss', 'content': 0.08170071989297867, 'timestamp': '2025-10-01 04:35:14.224282', 'step': 18025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:14.255565', 'step': 18025, 'epoch': 3} {'type': 'loss', 'content': 0.07194016128778458, 'timestamp': '2025-10-01 04:35:14.258537', 'step': 18026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:14.289121', 'step': 18026, 'epoch': 3} {'type': 'loss', 'content': 0.09093223512172699, 'timestamp': '2025-10-01 04:35:14.291462', 'step': 18027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:14.322062', 'step': 18027, 'epoch': 3} {'type': 'loss', 'content': 0.0456397607922554, 'timestamp': '2025-10-01 04:35:14.346091', 'step': 18028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:14.378868', 'step': 18028, 'epoch': 3} {'type': 'loss', 'content': 0.027874428778886795, 'timestamp': '2025-10-01 04:35:14.381063', 'step': 18029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:14.413140', 'step': 18029, 'epoch': 3} {'type': 'loss', 'content': 0.04943162947893143, 'timestamp': '2025-10-01 04:35:14.415405', 'step': 18030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:14.447182', 'step': 18030, 'epoch': 3} {'type': 'loss', 'content': 0.06102237477898598, 'timestamp': '2025-10-01 04:35:14.449659', 'step': 18031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:14.480719', 'step': 18031, 'epoch': 3} {'type': 'loss', 'content': 0.11394041776657104, 'timestamp': '2025-10-01 04:35:14.504572', 'step': 18032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:14.536336', 'step': 18032, 'epoch': 3} {'type': 'loss', 'content': 0.06754767894744873, 'timestamp': '2025-10-01 04:35:14.538565', 'step': 18033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:14.569052', 'step': 18033, 'epoch': 3} {'type': 'loss', 'content': 0.07728781551122665, 'timestamp': '2025-10-01 04:35:14.571470', 'step': 18034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:14.602216', 'step': 18034, 'epoch': 3} {'type': 'loss', 'content': 0.03517615422606468, 'timestamp': '2025-10-01 04:35:14.604619', 'step': 18035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:14.635454', 'step': 18035, 'epoch': 3} {'type': 'loss', 'content': 0.15817642211914062, 'timestamp': '2025-10-01 04:35:14.659389', 'step': 18036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:14.689696', 'step': 18036, 'epoch': 3} {'type': 'loss', 'content': 0.15322345495224, 'timestamp': '2025-10-01 04:35:14.691872', 'step': 18037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:14.722313', 'step': 18037, 'epoch': 3} {'type': 'loss', 'content': 0.02775886096060276, 'timestamp': '2025-10-01 04:35:14.724452', 'step': 18038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:14.755075', 'step': 18038, 'epoch': 3} {'type': 'loss', 'content': 0.09872446954250336, 'timestamp': '2025-10-01 04:35:14.757472', 'step': 18039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:14.787852', 'step': 18039, 'epoch': 3} {'type': 'loss', 'content': 0.0024100064765661955, 'timestamp': '2025-10-01 04:35:14.811587', 'step': 18040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:14.843459', 'step': 18040, 'epoch': 3} {'type': 'loss', 'content': 0.048942580819129944, 'timestamp': '2025-10-01 04:35:14.845756', 'step': 18041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:14.876849', 'step': 18041, 'epoch': 3} {'type': 'loss', 'content': 0.05312011390924454, 'timestamp': '2025-10-01 04:35:14.879184', 'step': 18042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:14.910853', 'step': 18042, 'epoch': 3} {'type': 'loss', 'content': 0.04349591210484505, 'timestamp': '2025-10-01 04:35:14.913197', 'step': 18043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:14.943615', 'step': 18043, 'epoch': 3} {'type': 'loss', 'content': 0.0636000782251358, 'timestamp': '2025-10-01 04:35:14.968226', 'step': 18044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:15.007528', 'step': 18044, 'epoch': 3} {'type': 'loss', 'content': 0.08508700132369995, 'timestamp': '2025-10-01 04:35:15.011092', 'step': 18045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:15.048344', 'step': 18045, 'epoch': 3} {'type': 'loss', 'content': 0.09742658585309982, 'timestamp': '2025-10-01 04:35:15.050512', 'step': 18046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:15.080873', 'step': 18046, 'epoch': 3} {'type': 'loss', 'content': 0.018313443288207054, 'timestamp': '2025-10-01 04:35:15.086566', 'step': 18047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:35:15.117398', 'step': 18047, 'epoch': 3} {'type': 'loss', 'content': 0.047217801213264465, 'timestamp': '2025-10-01 04:35:15.142794', 'step': 18048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:15.173251', 'step': 18048, 'epoch': 3} {'type': 'loss', 'content': 0.03179098293185234, 'timestamp': '2025-10-01 04:35:15.175899', 'step': 18049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:15.207216', 'step': 18049, 'epoch': 3} {'type': 'loss', 'content': 0.07610096037387848, 'timestamp': '2025-10-01 04:35:15.209579', 'step': 18050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:15.239725', 'step': 18050, 'epoch': 3} {'type': 'loss', 'content': 0.07500490546226501, 'timestamp': '2025-10-01 04:35:15.242067', 'step': 18051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:15.272797', 'step': 18051, 'epoch': 3} {'type': 'loss', 'content': 0.08651920408010483, 'timestamp': '2025-10-01 04:35:15.296346', 'step': 18052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:15.328405', 'step': 18052, 'epoch': 3} {'type': 'loss', 'content': 0.08786004781723022, 'timestamp': '2025-10-01 04:35:15.330870', 'step': 18053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:15.362144', 'step': 18053, 'epoch': 3} {'type': 'loss', 'content': 0.06419035792350769, 'timestamp': '2025-10-01 04:35:15.364787', 'step': 18054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:15.398452', 'step': 18054, 'epoch': 3} {'type': 'loss', 'content': 0.06742134690284729, 'timestamp': '2025-10-01 04:35:15.400666', 'step': 18055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:15.431914', 'step': 18055, 'epoch': 3} {'type': 'loss', 'content': 0.1400710791349411, 'timestamp': '2025-10-01 04:35:15.456127', 'step': 18056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:15.486631', 'step': 18056, 'epoch': 3} {'type': 'loss', 'content': 0.07701706141233444, 'timestamp': '2025-10-01 04:35:15.488673', 'step': 18057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:15.519286', 'step': 18057, 'epoch': 3} {'type': 'loss', 'content': 0.07128746062517166, 'timestamp': '2025-10-01 04:35:15.521384', 'step': 18058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:15.552039', 'step': 18058, 'epoch': 3} {'type': 'loss', 'content': 0.06940027326345444, 'timestamp': '2025-10-01 04:35:15.554768', 'step': 18059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:15.585757', 'step': 18059, 'epoch': 3} {'type': 'loss', 'content': 0.1225489154458046, 'timestamp': '2025-10-01 04:35:15.609384', 'step': 18060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:35:15.639440', 'step': 18060, 'epoch': 3} {'type': 'loss', 'content': 0.06626219302415848, 'timestamp': '2025-10-01 04:35:15.641763', 'step': 18061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:15.674029', 'step': 18061, 'epoch': 3} {'type': 'loss', 'content': 0.034858524799346924, 'timestamp': '2025-10-01 04:35:15.676425', 'step': 18062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:15.707078', 'step': 18062, 'epoch': 3} {'type': 'loss', 'content': 0.0804787427186966, 'timestamp': '2025-10-01 04:35:15.709344', 'step': 18063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:15.739886', 'step': 18063, 'epoch': 3} {'type': 'loss', 'content': 0.04806621000170708, 'timestamp': '2025-10-01 04:35:15.763718', 'step': 18064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:15.794797', 'step': 18064, 'epoch': 3} {'type': 'loss', 'content': 0.07661885768175125, 'timestamp': '2025-10-01 04:35:15.797165', 'step': 18065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:15.828586', 'step': 18065, 'epoch': 3} {'type': 'loss', 'content': 0.09924034029245377, 'timestamp': '2025-10-01 04:35:15.831922', 'step': 18066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:15.863482', 'step': 18066, 'epoch': 3} {'type': 'loss', 'content': 0.07429248094558716, 'timestamp': '2025-10-01 04:35:15.865754', 'step': 18067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:15.896147', 'step': 18067, 'epoch': 3} {'type': 'loss', 'content': 0.06673979014158249, 'timestamp': '2025-10-01 04:35:15.919836', 'step': 18068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:15.952807', 'step': 18068, 'epoch': 3} {'type': 'loss', 'content': 0.07923636585474014, 'timestamp': '2025-10-01 04:35:15.955143', 'step': 18069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:15.986239', 'step': 18069, 'epoch': 3} {'type': 'loss', 'content': 0.05020340159535408, 'timestamp': '2025-10-01 04:35:15.988756', 'step': 18070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:16.018964', 'step': 18070, 'epoch': 3} {'type': 'loss', 'content': 0.05235278606414795, 'timestamp': '2025-10-01 04:35:16.021050', 'step': 18071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:16.051433', 'step': 18071, 'epoch': 3} {'type': 'loss', 'content': 0.04883821681141853, 'timestamp': '2025-10-01 04:35:16.075183', 'step': 18072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:16.106445', 'step': 18072, 'epoch': 3} {'type': 'loss', 'content': 0.059409547597169876, 'timestamp': '2025-10-01 04:35:16.123056', 'step': 18073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:16.153733', 'step': 18073, 'epoch': 3} {'type': 'loss', 'content': 0.07603462785482407, 'timestamp': '2025-10-01 04:35:16.155899', 'step': 18074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:16.186056', 'step': 18074, 'epoch': 3} {'type': 'loss', 'content': 0.0786840096116066, 'timestamp': '2025-10-01 04:35:16.188231', 'step': 18075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:16.218917', 'step': 18075, 'epoch': 3} {'type': 'loss', 'content': 0.06818731874227524, 'timestamp': '2025-10-01 04:35:16.242740', 'step': 18076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:16.273692', 'step': 18076, 'epoch': 3} {'type': 'loss', 'content': 0.05967371165752411, 'timestamp': '2025-10-01 04:35:16.276623', 'step': 18077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:16.307770', 'step': 18077, 'epoch': 3} {'type': 'loss', 'content': 0.07892680913209915, 'timestamp': '2025-10-01 04:35:16.310031', 'step': 18078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:16.340881', 'step': 18078, 'epoch': 3} {'type': 'loss', 'content': 0.052692484110593796, 'timestamp': '2025-10-01 04:35:16.343039', 'step': 18079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:16.373531', 'step': 18079, 'epoch': 3} {'type': 'loss', 'content': 0.12508060038089752, 'timestamp': '2025-10-01 04:35:16.397374', 'step': 18080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:16.428779', 'step': 18080, 'epoch': 3} {'type': 'loss', 'content': 0.08449956774711609, 'timestamp': '2025-10-01 04:35:16.431470', 'step': 18081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:16.467192', 'step': 18081, 'epoch': 3} {'type': 'loss', 'content': 0.09094339609146118, 'timestamp': '2025-10-01 04:35:16.470167', 'step': 18082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:16.504707', 'step': 18082, 'epoch': 3} {'type': 'loss', 'content': 0.10666336119174957, 'timestamp': '2025-10-01 04:35:16.507719', 'step': 18083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:16.538129', 'step': 18083, 'epoch': 3} {'type': 'loss', 'content': 0.07895248383283615, 'timestamp': '2025-10-01 04:35:16.562417', 'step': 18084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:16.592827', 'step': 18084, 'epoch': 3} {'type': 'loss', 'content': 0.06539801508188248, 'timestamp': '2025-10-01 04:35:16.594915', 'step': 18085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:16.625459', 'step': 18085, 'epoch': 3} {'type': 'loss', 'content': 0.06540308892726898, 'timestamp': '2025-10-01 04:35:16.627858', 'step': 18086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:16.658234', 'step': 18086, 'epoch': 3} {'type': 'loss', 'content': 0.1353999525308609, 'timestamp': '2025-10-01 04:35:16.663969', 'step': 18087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:16.702724', 'step': 18087, 'epoch': 3} {'type': 'loss', 'content': 0.13280293345451355, 'timestamp': '2025-10-01 04:35:16.727538', 'step': 18088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:16.757610', 'step': 18088, 'epoch': 3} {'type': 'loss', 'content': 0.0521865114569664, 'timestamp': '2025-10-01 04:35:16.760235', 'step': 18089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:16.790708', 'step': 18089, 'epoch': 3} {'type': 'loss', 'content': 0.05444668233394623, 'timestamp': '2025-10-01 04:35:16.793053', 'step': 18090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:16.823903', 'step': 18090, 'epoch': 3} {'type': 'loss', 'content': 0.04654831066727638, 'timestamp': '2025-10-01 04:35:16.826494', 'step': 18091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:16.857068', 'step': 18091, 'epoch': 3} {'type': 'loss', 'content': 0.07133762538433075, 'timestamp': '2025-10-01 04:35:16.880855', 'step': 18092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:16.911097', 'step': 18092, 'epoch': 3} {'type': 'loss', 'content': 0.08991839736700058, 'timestamp': '2025-10-01 04:35:16.913253', 'step': 18093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:16.944172', 'step': 18093, 'epoch': 3} {'type': 'loss', 'content': 0.10676754266023636, 'timestamp': '2025-10-01 04:35:16.946374', 'step': 18094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:16.978984', 'step': 18094, 'epoch': 3} {'type': 'loss', 'content': 0.09904184937477112, 'timestamp': '2025-10-01 04:35:16.981633', 'step': 18095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:17.013810', 'step': 18095, 'epoch': 3} {'type': 'loss', 'content': 0.07210822403430939, 'timestamp': '2025-10-01 04:35:17.037711', 'step': 18096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:17.068218', 'step': 18096, 'epoch': 3} {'type': 'loss', 'content': 0.06461068242788315, 'timestamp': '2025-10-01 04:35:17.070824', 'step': 18097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:17.111409', 'step': 18097, 'epoch': 3} {'type': 'loss', 'content': 0.099603570997715, 'timestamp': '2025-10-01 04:35:17.113989', 'step': 18098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:17.151687', 'step': 18098, 'epoch': 3} {'type': 'loss', 'content': 0.06976046413183212, 'timestamp': '2025-10-01 04:35:17.154024', 'step': 18099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:17.184594', 'step': 18099, 'epoch': 3} {'type': 'loss', 'content': 0.0362103171646595, 'timestamp': '2025-10-01 04:35:17.208941', 'step': 18100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:17.239540', 'step': 18100, 'epoch': 3} {'type': 'loss', 'content': 0.050078053027391434, 'timestamp': '2025-10-01 04:35:17.242490', 'step': 18101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:17.273424', 'step': 18101, 'epoch': 3} {'type': 'loss', 'content': 0.012331640347838402, 'timestamp': '2025-10-01 04:35:17.275754', 'step': 18102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:17.310355', 'step': 18102, 'epoch': 3} {'type': 'loss', 'content': 0.04172982648015022, 'timestamp': '2025-10-01 04:35:17.312484', 'step': 18103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:17.343386', 'step': 18103, 'epoch': 3} {'type': 'loss', 'content': 0.06617353111505508, 'timestamp': '2025-10-01 04:35:17.367069', 'step': 18104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:17.399065', 'step': 18104, 'epoch': 3} {'type': 'loss', 'content': 0.07298443466424942, 'timestamp': '2025-10-01 04:35:17.401969', 'step': 18105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:17.433450', 'step': 18105, 'epoch': 3} {'type': 'loss', 'content': 0.034738872200250626, 'timestamp': '2025-10-01 04:35:17.442800', 'step': 18106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:17.481533', 'step': 18106, 'epoch': 3} {'type': 'loss', 'content': 0.04744194820523262, 'timestamp': '2025-10-01 04:35:17.483969', 'step': 18107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:17.516951', 'step': 18107, 'epoch': 3} {'type': 'loss', 'content': 0.056869786232709885, 'timestamp': '2025-10-01 04:35:17.541058', 'step': 18108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:17.571611', 'step': 18108, 'epoch': 3} {'type': 'loss', 'content': 0.04969966411590576, 'timestamp': '2025-10-01 04:35:17.574046', 'step': 18109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:17.604370', 'step': 18109, 'epoch': 3} {'type': 'loss', 'content': 0.020667515695095062, 'timestamp': '2025-10-01 04:35:17.612497', 'step': 18110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:17.652116', 'step': 18110, 'epoch': 3} {'type': 'loss', 'content': 0.034787457436323166, 'timestamp': '2025-10-01 04:35:17.654153', 'step': 18111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:17.685199', 'step': 18111, 'epoch': 3} {'type': 'loss', 'content': 0.01958821900188923, 'timestamp': '2025-10-01 04:35:17.709228', 'step': 18112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:17.740387', 'step': 18112, 'epoch': 3} {'type': 'loss', 'content': 0.050358619540929794, 'timestamp': '2025-10-01 04:35:17.742580', 'step': 18113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:17.776030', 'step': 18113, 'epoch': 3} {'type': 'loss', 'content': 0.07708635926246643, 'timestamp': '2025-10-01 04:35:17.778200', 'step': 18114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:17.808826', 'step': 18114, 'epoch': 3} {'type': 'loss', 'content': 0.06341146677732468, 'timestamp': '2025-10-01 04:35:17.810996', 'step': 18115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:17.846107', 'step': 18115, 'epoch': 3} {'type': 'loss', 'content': 0.058192335069179535, 'timestamp': '2025-10-01 04:35:17.873494', 'step': 18116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:17.908834', 'step': 18116, 'epoch': 3} {'type': 'loss', 'content': 0.047606758773326874, 'timestamp': '2025-10-01 04:35:17.912623', 'step': 18117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:17.942920', 'step': 18117, 'epoch': 3} {'type': 'loss', 'content': 0.042234208434820175, 'timestamp': '2025-10-01 04:35:17.945151', 'step': 18118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:17.979483', 'step': 18118, 'epoch': 3} {'type': 'loss', 'content': 0.07560675591230392, 'timestamp': '2025-10-01 04:35:17.981729', 'step': 18119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:18.012330', 'step': 18119, 'epoch': 3} {'type': 'loss', 'content': 0.0303750429302454, 'timestamp': '2025-10-01 04:35:18.036172', 'step': 18120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:18.066843', 'step': 18120, 'epoch': 3} {'type': 'loss', 'content': 0.05763203650712967, 'timestamp': '2025-10-01 04:35:18.069875', 'step': 18121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:18.104196', 'step': 18121, 'epoch': 3} {'type': 'loss', 'content': 0.10337752103805542, 'timestamp': '2025-10-01 04:35:18.106399', 'step': 18122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:18.137043', 'step': 18122, 'epoch': 3} {'type': 'loss', 'content': 0.08748925477266312, 'timestamp': '2025-10-01 04:35:18.139949', 'step': 18123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:18.170552', 'step': 18123, 'epoch': 3} {'type': 'loss', 'content': 0.12063772976398468, 'timestamp': '2025-10-01 04:35:18.194427', 'step': 18124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:18.228037', 'step': 18124, 'epoch': 3} {'type': 'loss', 'content': 0.026808414608240128, 'timestamp': '2025-10-01 04:35:18.230231', 'step': 18125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:18.261890', 'step': 18125, 'epoch': 3} {'type': 'loss', 'content': 0.0693783089518547, 'timestamp': '2025-10-01 04:35:18.264069', 'step': 18126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:18.295105', 'step': 18126, 'epoch': 3} {'type': 'loss', 'content': 0.1029866561293602, 'timestamp': '2025-10-01 04:35:18.305391', 'step': 18127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:18.336430', 'step': 18127, 'epoch': 3} {'type': 'loss', 'content': 0.05563690885901451, 'timestamp': '2025-10-01 04:35:18.360279', 'step': 18128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:18.391753', 'step': 18128, 'epoch': 3} {'type': 'loss', 'content': 0.032225970178842545, 'timestamp': '2025-10-01 04:35:18.400168', 'step': 18129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:18.432185', 'step': 18129, 'epoch': 3} {'type': 'loss', 'content': 0.12078268080949783, 'timestamp': '2025-10-01 04:35:18.434717', 'step': 18130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:18.466273', 'step': 18130, 'epoch': 3} {'type': 'loss', 'content': 0.07817292213439941, 'timestamp': '2025-10-01 04:35:18.472507', 'step': 18131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:18.504525', 'step': 18131, 'epoch': 3} {'type': 'loss', 'content': 0.01787143386900425, 'timestamp': '2025-10-01 04:35:18.528433', 'step': 18132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:18.560153', 'step': 18132, 'epoch': 3} {'type': 'loss', 'content': 0.11933394521474838, 'timestamp': '2025-10-01 04:35:18.563272', 'step': 18133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:18.594343', 'step': 18133, 'epoch': 3} {'type': 'loss', 'content': 0.06582508981227875, 'timestamp': '2025-10-01 04:35:18.597643', 'step': 18134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:18.628519', 'step': 18134, 'epoch': 3} {'type': 'loss', 'content': 0.09693414717912674, 'timestamp': '2025-10-01 04:35:18.631158', 'step': 18135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:18.662319', 'step': 18135, 'epoch': 3} {'type': 'loss', 'content': 0.09413468092679977, 'timestamp': '2025-10-01 04:35:18.686015', 'step': 18136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:18.717686', 'step': 18136, 'epoch': 3} {'type': 'loss', 'content': 0.11778848618268967, 'timestamp': '2025-10-01 04:35:18.720205', 'step': 18137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:18.756655', 'step': 18137, 'epoch': 3} {'type': 'loss', 'content': 0.053705938160419464, 'timestamp': '2025-10-01 04:35:18.765057', 'step': 18138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:18.795369', 'step': 18138, 'epoch': 3} {'type': 'loss', 'content': 0.07431697100400925, 'timestamp': '2025-10-01 04:35:18.798067', 'step': 18139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:18.830467', 'step': 18139, 'epoch': 3} {'type': 'loss', 'content': 0.06200534105300903, 'timestamp': '2025-10-01 04:35:18.854793', 'step': 18140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:18.888370', 'step': 18140, 'epoch': 3} {'type': 'loss', 'content': 0.1069132387638092, 'timestamp': '2025-10-01 04:35:18.890882', 'step': 18141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:18.928840', 'step': 18141, 'epoch': 3} {'type': 'loss', 'content': 0.1096271425485611, 'timestamp': '2025-10-01 04:35:18.931457', 'step': 18142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:18.962306', 'step': 18142, 'epoch': 3} {'type': 'loss', 'content': 0.11814257502555847, 'timestamp': '2025-10-01 04:35:18.964855', 'step': 18143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:35:18.999025', 'step': 18143, 'epoch': 3} {'type': 'loss', 'content': 0.12199317663908005, 'timestamp': '2025-10-01 04:35:19.027754', 'step': 18144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:19.061952', 'step': 18144, 'epoch': 3} {'type': 'loss', 'content': 0.0516899898648262, 'timestamp': '2025-10-01 04:35:19.064512', 'step': 18145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:19.101058', 'step': 18145, 'epoch': 3} {'type': 'loss', 'content': 0.0524958111345768, 'timestamp': '2025-10-01 04:35:19.108247', 'step': 18146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:19.139987', 'step': 18146, 'epoch': 3} {'type': 'loss', 'content': 0.10202397406101227, 'timestamp': '2025-10-01 04:35:19.142281', 'step': 18147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:19.175844', 'step': 18147, 'epoch': 3} {'type': 'loss', 'content': 0.1139659658074379, 'timestamp': '2025-10-01 04:35:19.199803', 'step': 18148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:19.231636', 'step': 18148, 'epoch': 3} {'type': 'loss', 'content': 0.038535721600055695, 'timestamp': '2025-10-01 04:35:19.234054', 'step': 18149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:19.265663', 'step': 18149, 'epoch': 3} {'type': 'loss', 'content': 0.0603901632130146, 'timestamp': '2025-10-01 04:35:19.268520', 'step': 18150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:35:19.300391', 'step': 18150, 'epoch': 3} {'type': 'loss', 'content': 0.06605145335197449, 'timestamp': '2025-10-01 04:35:19.304821', 'step': 18151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:19.335237', 'step': 18151, 'epoch': 3} {'type': 'loss', 'content': 0.016400177031755447, 'timestamp': '2025-10-01 04:35:19.359344', 'step': 18152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:19.392135', 'step': 18152, 'epoch': 3} {'type': 'loss', 'content': 0.034616198390722275, 'timestamp': '2025-10-01 04:35:19.395127', 'step': 18153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:19.433850', 'step': 18153, 'epoch': 3} {'type': 'loss', 'content': 0.04387844726443291, 'timestamp': '2025-10-01 04:35:19.436330', 'step': 18154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:19.469541', 'step': 18154, 'epoch': 3} {'type': 'loss', 'content': 0.19545136392116547, 'timestamp': '2025-10-01 04:35:19.472266', 'step': 18155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:19.505330', 'step': 18155, 'epoch': 3} {'type': 'loss', 'content': 0.026547539979219437, 'timestamp': '2025-10-01 04:35:19.529495', 'step': 18156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:19.563189', 'step': 18156, 'epoch': 3} {'type': 'loss', 'content': 0.09595859795808792, 'timestamp': '2025-10-01 04:35:19.566311', 'step': 18157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:19.597768', 'step': 18157, 'epoch': 3} {'type': 'loss', 'content': 0.09721852093935013, 'timestamp': '2025-10-01 04:35:19.600401', 'step': 18158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:19.633501', 'step': 18158, 'epoch': 3} {'type': 'loss', 'content': 0.07385565340518951, 'timestamp': '2025-10-01 04:35:19.636186', 'step': 18159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:19.669186', 'step': 18159, 'epoch': 3} {'type': 'loss', 'content': 0.07279215008020401, 'timestamp': '2025-10-01 04:35:19.693244', 'step': 18160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:19.724076', 'step': 18160, 'epoch': 3} {'type': 'loss', 'content': 0.032699503004550934, 'timestamp': '2025-10-01 04:35:19.726231', 'step': 18161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:19.757629', 'step': 18161, 'epoch': 3} {'type': 'loss', 'content': 0.14523003995418549, 'timestamp': '2025-10-01 04:35:19.760376', 'step': 18162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:19.792504', 'step': 18162, 'epoch': 3} {'type': 'loss', 'content': 0.012028724886476994, 'timestamp': '2025-10-01 04:35:19.795234', 'step': 18163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:19.829557', 'step': 18163, 'epoch': 3} {'type': 'loss', 'content': 0.10905168950557709, 'timestamp': '2025-10-01 04:35:19.853605', 'step': 18164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:19.888578', 'step': 18164, 'epoch': 3} {'type': 'loss', 'content': 0.04344380646944046, 'timestamp': '2025-10-01 04:35:19.891083', 'step': 18165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:19.924725', 'step': 18165, 'epoch': 3} {'type': 'loss', 'content': 0.0516817532479763, 'timestamp': '2025-10-01 04:35:19.927108', 'step': 18166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:19.966506', 'step': 18166, 'epoch': 3} {'type': 'loss', 'content': 0.02142231911420822, 'timestamp': '2025-10-01 04:35:19.969203', 'step': 18167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:20.001404', 'step': 18167, 'epoch': 3} {'type': 'loss', 'content': 0.1388004571199417, 'timestamp': '2025-10-01 04:35:20.025533', 'step': 18168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:20.055386', 'step': 18168, 'epoch': 3} {'type': 'loss', 'content': 0.09633073955774307, 'timestamp': '2025-10-01 04:35:20.057946', 'step': 18169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:20.088744', 'step': 18169, 'epoch': 3} {'type': 'loss', 'content': 0.039855677634477615, 'timestamp': '2025-10-01 04:35:20.091088', 'step': 18170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:20.121825', 'step': 18170, 'epoch': 3} {'type': 'loss', 'content': 0.04279109463095665, 'timestamp': '2025-10-01 04:35:20.124535', 'step': 18171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:20.155934', 'step': 18171, 'epoch': 3} {'type': 'loss', 'content': 0.0728047788143158, 'timestamp': '2025-10-01 04:35:20.180473', 'step': 18172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:20.212045', 'step': 18172, 'epoch': 3} {'type': 'loss', 'content': 0.05120188742876053, 'timestamp': '2025-10-01 04:35:20.214230', 'step': 18173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:20.245058', 'step': 18173, 'epoch': 3} {'type': 'loss', 'content': 0.07065985351800919, 'timestamp': '2025-10-01 04:35:20.247742', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:35:29.294475', 'step': 18174, 'epoch': 3} {'type': 'pplx', 'content': 14113.113972332216, 'timestamp': '2025-10-01 04:35:29.298427', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:29.328650', 'step': 18174, 'epoch': 3} {'type': 'loss', 'content': 0.06781824678182602, 'timestamp': '2025-10-01 04:35:29.331288', 'step': 18175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:29.362691', 'step': 18175, 'epoch': 3} {'type': 'loss', 'content': 0.047384120523929596, 'timestamp': '2025-10-01 04:35:29.386829', 'step': 18176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:29.418117', 'step': 18176, 'epoch': 3} {'type': 'loss', 'content': 0.07018577307462692, 'timestamp': '2025-10-01 04:35:29.420374', 'step': 18177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:29.452693', 'step': 18177, 'epoch': 3} {'type': 'loss', 'content': 0.057656437158584595, 'timestamp': '2025-10-01 04:35:29.455999', 'step': 18178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:29.488704', 'step': 18178, 'epoch': 3} {'type': 'loss', 'content': 0.10507199913263321, 'timestamp': '2025-10-01 04:35:29.490930', 'step': 18179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:29.523887', 'step': 18179, 'epoch': 3} {'type': 'loss', 'content': 0.05199024826288223, 'timestamp': '2025-10-01 04:35:29.551722', 'step': 18180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:29.582346', 'step': 18180, 'epoch': 3} {'type': 'loss', 'content': 0.10253860056400299, 'timestamp': '2025-10-01 04:35:29.584338', 'step': 18181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:29.619588', 'step': 18181, 'epoch': 3} {'type': 'loss', 'content': 0.05667087808251381, 'timestamp': '2025-10-01 04:35:29.622392', 'step': 18182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:29.653032', 'step': 18182, 'epoch': 3} {'type': 'loss', 'content': 0.05502351000905037, 'timestamp': '2025-10-01 04:35:29.655316', 'step': 18183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:35:29.688633', 'step': 18183, 'epoch': 3} {'type': 'loss', 'content': 0.04931708052754402, 'timestamp': '2025-10-01 04:35:29.713967', 'step': 18184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:29.749833', 'step': 18184, 'epoch': 3} {'type': 'loss', 'content': 0.08410704880952835, 'timestamp': '2025-10-01 04:35:29.752571', 'step': 18185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:29.783202', 'step': 18185, 'epoch': 3} {'type': 'loss', 'content': 0.10030234605073929, 'timestamp': '2025-10-01 04:35:29.785623', 'step': 18186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:29.817881', 'step': 18186, 'epoch': 3} {'type': 'loss', 'content': 0.10776897519826889, 'timestamp': '2025-10-01 04:35:29.821759', 'step': 18187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:29.852590', 'step': 18187, 'epoch': 3} {'type': 'loss', 'content': 0.06902477890253067, 'timestamp': '2025-10-01 04:35:29.876575', 'step': 18188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:29.918778', 'step': 18188, 'epoch': 3} {'type': 'loss', 'content': 0.08384348452091217, 'timestamp': '2025-10-01 04:35:29.921000', 'step': 18189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:29.951362', 'step': 18189, 'epoch': 3} {'type': 'loss', 'content': 0.05458569899201393, 'timestamp': '2025-10-01 04:35:29.953771', 'step': 18190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:29.985293', 'step': 18190, 'epoch': 3} {'type': 'loss', 'content': 0.03588363900780678, 'timestamp': '2025-10-01 04:35:29.988206', 'step': 18191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:30.020238', 'step': 18191, 'epoch': 3} {'type': 'loss', 'content': 0.04997517168521881, 'timestamp': '2025-10-01 04:35:30.044062', 'step': 18192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:30.074936', 'step': 18192, 'epoch': 3} {'type': 'loss', 'content': 0.0877159982919693, 'timestamp': '2025-10-01 04:35:30.077345', 'step': 18193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:30.108827', 'step': 18193, 'epoch': 3} {'type': 'loss', 'content': 0.03946239873766899, 'timestamp': '2025-10-01 04:35:30.111082', 'step': 18194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:30.142473', 'step': 18194, 'epoch': 3} {'type': 'loss', 'content': 0.02645282633602619, 'timestamp': '2025-10-01 04:35:30.145139', 'step': 18195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:30.175553', 'step': 18195, 'epoch': 3} {'type': 'loss', 'content': 0.07427535206079483, 'timestamp': '2025-10-01 04:35:30.199474', 'step': 18196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:30.230376', 'step': 18196, 'epoch': 3} {'type': 'loss', 'content': 0.04656911641359329, 'timestamp': '2025-10-01 04:35:30.232639', 'step': 18197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:30.263618', 'step': 18197, 'epoch': 3} {'type': 'loss', 'content': 0.062220409512519836, 'timestamp': '2025-10-01 04:35:30.267128', 'step': 18198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:30.303134', 'step': 18198, 'epoch': 3} {'type': 'loss', 'content': 0.12421120703220367, 'timestamp': '2025-10-01 04:35:30.305845', 'step': 18199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:30.343241', 'step': 18199, 'epoch': 3} {'type': 'loss', 'content': 0.05032821744680405, 'timestamp': '2025-10-01 04:35:30.366744', 'step': 18200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:30.398082', 'step': 18200, 'epoch': 3} {'type': 'loss', 'content': 0.03230574354529381, 'timestamp': '2025-10-01 04:35:30.400824', 'step': 18201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:30.433870', 'step': 18201, 'epoch': 3} {'type': 'loss', 'content': 0.028049495071172714, 'timestamp': '2025-10-01 04:35:30.436160', 'step': 18202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:30.467288', 'step': 18202, 'epoch': 3} {'type': 'loss', 'content': 0.11627575755119324, 'timestamp': '2025-10-01 04:35:30.469575', 'step': 18203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:30.499582', 'step': 18203, 'epoch': 3} {'type': 'loss', 'content': 0.0908622071146965, 'timestamp': '2025-10-01 04:35:30.525281', 'step': 18204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:30.566148', 'step': 18204, 'epoch': 3} {'type': 'loss', 'content': 0.14700385928153992, 'timestamp': '2025-10-01 04:35:30.568281', 'step': 18205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:30.600828', 'step': 18205, 'epoch': 3} {'type': 'loss', 'content': 0.053066279739141464, 'timestamp': '2025-10-01 04:35:30.603319', 'step': 18206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:30.647537', 'step': 18206, 'epoch': 3} {'type': 'loss', 'content': 0.07072725892066956, 'timestamp': '2025-10-01 04:35:30.649816', 'step': 18207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:30.684799', 'step': 18207, 'epoch': 3} {'type': 'loss', 'content': 0.1126885786652565, 'timestamp': '2025-10-01 04:35:30.708492', 'step': 18208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:30.743716', 'step': 18208, 'epoch': 3} {'type': 'loss', 'content': 0.0649670958518982, 'timestamp': '2025-10-01 04:35:30.745889', 'step': 18209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:30.796607', 'step': 18209, 'epoch': 3} {'type': 'loss', 'content': 0.10649929940700531, 'timestamp': '2025-10-01 04:35:30.799250', 'step': 18210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:30.832088', 'step': 18210, 'epoch': 3} {'type': 'loss', 'content': 0.07385486364364624, 'timestamp': '2025-10-01 04:35:30.834274', 'step': 18211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:30.866974', 'step': 18211, 'epoch': 3} {'type': 'loss', 'content': 0.008687854744493961, 'timestamp': '2025-10-01 04:35:30.891027', 'step': 18212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:30.924200', 'step': 18212, 'epoch': 3} {'type': 'loss', 'content': 0.13580989837646484, 'timestamp': '2025-10-01 04:35:30.926841', 'step': 18213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:30.959414', 'step': 18213, 'epoch': 3} {'type': 'loss', 'content': 0.1216193288564682, 'timestamp': '2025-10-01 04:35:30.961627', 'step': 18214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:30.994481', 'step': 18214, 'epoch': 3} {'type': 'loss', 'content': 0.04890800639986992, 'timestamp': '2025-10-01 04:35:30.996789', 'step': 18215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:31.032265', 'step': 18215, 'epoch': 3} {'type': 'loss', 'content': 0.0473240427672863, 'timestamp': '2025-10-01 04:35:31.056060', 'step': 18216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:31.088099', 'step': 18216, 'epoch': 3} {'type': 'loss', 'content': 0.08032028377056122, 'timestamp': '2025-10-01 04:35:31.090771', 'step': 18217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:31.123513', 'step': 18217, 'epoch': 3} {'type': 'loss', 'content': 0.046812091022729874, 'timestamp': '2025-10-01 04:35:31.127175', 'step': 18218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:31.159036', 'step': 18218, 'epoch': 3} {'type': 'loss', 'content': 0.012529642321169376, 'timestamp': '2025-10-01 04:35:31.161677', 'step': 18219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:31.192703', 'step': 18219, 'epoch': 3} {'type': 'loss', 'content': 0.08260522782802582, 'timestamp': '2025-10-01 04:35:31.216567', 'step': 18220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:31.251513', 'step': 18220, 'epoch': 3} {'type': 'loss', 'content': 0.040723878890275955, 'timestamp': '2025-10-01 04:35:31.253695', 'step': 18221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:31.295770', 'step': 18221, 'epoch': 3} {'type': 'loss', 'content': 0.12729845941066742, 'timestamp': '2025-10-01 04:35:31.299457', 'step': 18222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:31.337441', 'step': 18222, 'epoch': 3} {'type': 'loss', 'content': 0.0592985637485981, 'timestamp': '2025-10-01 04:35:31.339560', 'step': 18223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:31.372829', 'step': 18223, 'epoch': 3} {'type': 'loss', 'content': 0.053844306617975235, 'timestamp': '2025-10-01 04:35:31.396614', 'step': 18224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:31.428517', 'step': 18224, 'epoch': 3} {'type': 'loss', 'content': 0.04548987001180649, 'timestamp': '2025-10-01 04:35:31.430759', 'step': 18225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:31.474483', 'step': 18225, 'epoch': 3} {'type': 'loss', 'content': 0.03597297891974449, 'timestamp': '2025-10-01 04:35:31.476744', 'step': 18226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:31.520803', 'step': 18226, 'epoch': 3} {'type': 'loss', 'content': 0.04598794877529144, 'timestamp': '2025-10-01 04:35:31.523173', 'step': 18227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:31.554932', 'step': 18227, 'epoch': 3} {'type': 'loss', 'content': 0.0726371482014656, 'timestamp': '2025-10-01 04:35:31.578586', 'step': 18228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:31.611451', 'step': 18228, 'epoch': 3} {'type': 'loss', 'content': 0.09841577708721161, 'timestamp': '2025-10-01 04:35:31.613841', 'step': 18229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:31.648314', 'step': 18229, 'epoch': 3} {'type': 'loss', 'content': 0.033825233578681946, 'timestamp': '2025-10-01 04:35:31.651599', 'step': 18230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:31.685254', 'step': 18230, 'epoch': 3} {'type': 'loss', 'content': 0.1237352266907692, 'timestamp': '2025-10-01 04:35:31.688573', 'step': 18231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:31.721154', 'step': 18231, 'epoch': 3} {'type': 'loss', 'content': 0.03174465149641037, 'timestamp': '2025-10-01 04:35:31.744862', 'step': 18232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:31.778737', 'step': 18232, 'epoch': 3} {'type': 'loss', 'content': 0.033323634415864944, 'timestamp': '2025-10-01 04:35:31.780909', 'step': 18233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:31.815509', 'step': 18233, 'epoch': 3} {'type': 'loss', 'content': 0.07631374150514603, 'timestamp': '2025-10-01 04:35:31.818031', 'step': 18234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:31.850603', 'step': 18234, 'epoch': 3} {'type': 'loss', 'content': 0.07316293567419052, 'timestamp': '2025-10-01 04:35:31.853213', 'step': 18235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:31.885541', 'step': 18235, 'epoch': 3} {'type': 'loss', 'content': 0.05904245749115944, 'timestamp': '2025-10-01 04:35:31.909315', 'step': 18236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:31.942041', 'step': 18236, 'epoch': 3} {'type': 'loss', 'content': 0.039895132184028625, 'timestamp': '2025-10-01 04:35:31.944001', 'step': 18237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:31.978045', 'step': 18237, 'epoch': 3} {'type': 'loss', 'content': 0.04439663514494896, 'timestamp': '2025-10-01 04:35:31.980023', 'step': 18238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:32.024847', 'step': 18238, 'epoch': 3} {'type': 'loss', 'content': 0.0847914069890976, 'timestamp': '2025-10-01 04:35:32.027114', 'step': 18239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:32.074177', 'step': 18239, 'epoch': 3} {'type': 'loss', 'content': 0.048642341047525406, 'timestamp': '2025-10-01 04:35:32.098656', 'step': 18240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:32.140528', 'step': 18240, 'epoch': 3} {'type': 'loss', 'content': 0.07551153749227524, 'timestamp': '2025-10-01 04:35:32.142932', 'step': 18241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:32.179366', 'step': 18241, 'epoch': 3} {'type': 'loss', 'content': 0.03803030401468277, 'timestamp': '2025-10-01 04:35:32.181802', 'step': 18242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:35:32.215118', 'step': 18242, 'epoch': 3} {'type': 'loss', 'content': 0.011044641025364399, 'timestamp': '2025-10-01 04:35:32.219516', 'step': 18243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:32.253251', 'step': 18243, 'epoch': 3} {'type': 'loss', 'content': 0.031928133219480515, 'timestamp': '2025-10-01 04:35:32.277217', 'step': 18244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:32.311442', 'step': 18244, 'epoch': 3} {'type': 'loss', 'content': 0.13661691546440125, 'timestamp': '2025-10-01 04:35:32.313722', 'step': 18245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:32.356864', 'step': 18245, 'epoch': 3} {'type': 'loss', 'content': 0.08582852780818939, 'timestamp': '2025-10-01 04:35:32.358989', 'step': 18246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:32.392699', 'step': 18246, 'epoch': 3} {'type': 'loss', 'content': 0.07290790975093842, 'timestamp': '2025-10-01 04:35:32.395056', 'step': 18247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:35:32.441335', 'step': 18247, 'epoch': 3} {'type': 'loss', 'content': 0.1180543303489685, 'timestamp': '2025-10-01 04:35:32.473574', 'step': 18248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:32.510931', 'step': 18248, 'epoch': 3} {'type': 'loss', 'content': 0.09060269594192505, 'timestamp': '2025-10-01 04:35:32.513133', 'step': 18249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:32.545743', 'step': 18249, 'epoch': 3} {'type': 'loss', 'content': 0.041048768907785416, 'timestamp': '2025-10-01 04:35:32.548282', 'step': 18250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:32.582124', 'step': 18250, 'epoch': 3} {'type': 'loss', 'content': 0.023656031116843224, 'timestamp': '2025-10-01 04:35:32.584550', 'step': 18251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:32.625136', 'step': 18251, 'epoch': 3} {'type': 'loss', 'content': 0.045424286276102066, 'timestamp': '2025-10-01 04:35:32.649310', 'step': 18252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:32.681135', 'step': 18252, 'epoch': 3} {'type': 'loss', 'content': 0.020668134093284607, 'timestamp': '2025-10-01 04:35:32.683512', 'step': 18253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:32.716238', 'step': 18253, 'epoch': 3} {'type': 'loss', 'content': 0.08060788363218307, 'timestamp': '2025-10-01 04:35:32.718617', 'step': 18254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:32.753127', 'step': 18254, 'epoch': 3} {'type': 'loss', 'content': 0.016391361132264137, 'timestamp': '2025-10-01 04:35:32.763141', 'step': 18255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:32.802284', 'step': 18255, 'epoch': 3} {'type': 'loss', 'content': 0.11716711521148682, 'timestamp': '2025-10-01 04:35:32.826163', 'step': 18256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:32.858189', 'step': 18256, 'epoch': 3} {'type': 'loss', 'content': 0.05319104343652725, 'timestamp': '2025-10-01 04:35:32.860332', 'step': 18257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:32.898932', 'step': 18257, 'epoch': 3} {'type': 'loss', 'content': 0.0658469870686531, 'timestamp': '2025-10-01 04:35:32.901138', 'step': 18258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:32.933326', 'step': 18258, 'epoch': 3} {'type': 'loss', 'content': 0.11080078035593033, 'timestamp': '2025-10-01 04:35:32.935739', 'step': 18259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-10-01 04:35:32.976657', 'step': 18259, 'epoch': 3} {'type': 'loss', 'content': 0.033894315361976624, 'timestamp': '2025-10-01 04:35:33.013841', 'step': 18260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:33.047698', 'step': 18260, 'epoch': 3} {'type': 'loss', 'content': 0.06554458290338516, 'timestamp': '2025-10-01 04:35:33.050969', 'step': 18261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:33.086582', 'step': 18261, 'epoch': 3} {'type': 'loss', 'content': 0.13046132028102875, 'timestamp': '2025-10-01 04:35:33.089172', 'step': 18262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:33.129680', 'step': 18262, 'epoch': 3} {'type': 'loss', 'content': 0.0953269824385643, 'timestamp': '2025-10-01 04:35:33.131777', 'step': 18263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:33.163929', 'step': 18263, 'epoch': 3} {'type': 'loss', 'content': 0.1292407214641571, 'timestamp': '2025-10-01 04:35:33.187718', 'step': 18264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:33.221369', 'step': 18264, 'epoch': 3} {'type': 'loss', 'content': 0.04315745830535889, 'timestamp': '2025-10-01 04:35:33.223761', 'step': 18265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:33.255887', 'step': 18265, 'epoch': 3} {'type': 'loss', 'content': 0.09064147621393204, 'timestamp': '2025-10-01 04:35:33.258338', 'step': 18266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:33.291037', 'step': 18266, 'epoch': 3} {'type': 'loss', 'content': 0.060811541974544525, 'timestamp': '2025-10-01 04:35:33.295304', 'step': 18267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:33.328501', 'step': 18267, 'epoch': 3} {'type': 'loss', 'content': 0.057861220091581345, 'timestamp': '2025-10-01 04:35:33.353661', 'step': 18268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:33.386026', 'step': 18268, 'epoch': 3} {'type': 'loss', 'content': 0.10370127856731415, 'timestamp': '2025-10-01 04:35:33.388304', 'step': 18269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:33.421935', 'step': 18269, 'epoch': 3} {'type': 'loss', 'content': 0.03390045091509819, 'timestamp': '2025-10-01 04:35:33.433431', 'step': 18270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:33.482152', 'step': 18270, 'epoch': 3} {'type': 'loss', 'content': 0.046382855623960495, 'timestamp': '2025-10-01 04:35:33.484527', 'step': 18271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:33.524603', 'step': 18271, 'epoch': 3} {'type': 'loss', 'content': 0.05494583398103714, 'timestamp': '2025-10-01 04:35:33.548214', 'step': 18272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:33.580364', 'step': 18272, 'epoch': 3} {'type': 'loss', 'content': 0.04782315716147423, 'timestamp': '2025-10-01 04:35:33.582693', 'step': 18273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:33.614738', 'step': 18273, 'epoch': 3} {'type': 'loss', 'content': 0.05152697116136551, 'timestamp': '2025-10-01 04:35:33.626558', 'step': 18274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:33.672467', 'step': 18274, 'epoch': 3} {'type': 'loss', 'content': 0.12253634631633759, 'timestamp': '2025-10-01 04:35:33.686420', 'step': 18275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:33.719616', 'step': 18275, 'epoch': 3} {'type': 'loss', 'content': 0.0897708609700203, 'timestamp': '2025-10-01 04:35:33.743567', 'step': 18276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:33.775011', 'step': 18276, 'epoch': 3} {'type': 'loss', 'content': 0.059508323669433594, 'timestamp': '2025-10-01 04:35:33.789070', 'step': 18277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:33.821556', 'step': 18277, 'epoch': 3} {'type': 'loss', 'content': 0.038633570075035095, 'timestamp': '2025-10-01 04:35:33.824062', 'step': 18278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:33.857172', 'step': 18278, 'epoch': 3} {'type': 'loss', 'content': 0.07487723976373672, 'timestamp': '2025-10-01 04:35:33.859683', 'step': 18279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:33.891967', 'step': 18279, 'epoch': 3} {'type': 'loss', 'content': 0.1156376376748085, 'timestamp': '2025-10-01 04:35:33.915573', 'step': 18280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:33.948468', 'step': 18280, 'epoch': 3} {'type': 'loss', 'content': 0.11713496595621109, 'timestamp': '2025-10-01 04:35:33.951359', 'step': 18281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:33.983451', 'step': 18281, 'epoch': 3} {'type': 'loss', 'content': 0.06329325586557388, 'timestamp': '2025-10-01 04:35:33.986002', 'step': 18282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:34.018410', 'step': 18282, 'epoch': 3} {'type': 'loss', 'content': 0.007677969988435507, 'timestamp': '2025-10-01 04:35:34.020984', 'step': 18283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:34.053692', 'step': 18283, 'epoch': 3} {'type': 'loss', 'content': 0.11315280199050903, 'timestamp': '2025-10-01 04:35:34.086304', 'step': 18284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:34.130446', 'step': 18284, 'epoch': 3} {'type': 'loss', 'content': 0.11310235410928726, 'timestamp': '2025-10-01 04:35:34.132614', 'step': 18285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:34.168431', 'step': 18285, 'epoch': 3} {'type': 'loss', 'content': 0.030993858352303505, 'timestamp': '2025-10-01 04:35:34.171522', 'step': 18286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:34.206684', 'step': 18286, 'epoch': 3} {'type': 'loss', 'content': 0.09102215617895126, 'timestamp': '2025-10-01 04:35:34.218080', 'step': 18287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:35:34.250604', 'step': 18287, 'epoch': 3} {'type': 'loss', 'content': 0.044421032071113586, 'timestamp': '2025-10-01 04:35:34.275898', 'step': 18288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:34.309015', 'step': 18288, 'epoch': 3} {'type': 'loss', 'content': 0.07424620538949966, 'timestamp': '2025-10-01 04:35:34.311372', 'step': 18289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:34.343735', 'step': 18289, 'epoch': 3} {'type': 'loss', 'content': 0.06072337552905083, 'timestamp': '2025-10-01 04:35:34.346270', 'step': 18290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:34.379079', 'step': 18290, 'epoch': 3} {'type': 'loss', 'content': 0.1059432253241539, 'timestamp': '2025-10-01 04:35:34.386805', 'step': 18291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:34.418746', 'step': 18291, 'epoch': 3} {'type': 'loss', 'content': 0.08353010565042496, 'timestamp': '2025-10-01 04:35:34.442815', 'step': 18292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:34.480078', 'step': 18292, 'epoch': 3} {'type': 'loss', 'content': 0.06897012889385223, 'timestamp': '2025-10-01 04:35:34.482308', 'step': 18293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:34.522899', 'step': 18293, 'epoch': 3} {'type': 'loss', 'content': 0.09252270311117172, 'timestamp': '2025-10-01 04:35:34.525827', 'step': 18294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:34.561029', 'step': 18294, 'epoch': 3} {'type': 'loss', 'content': 0.014469396322965622, 'timestamp': '2025-10-01 04:35:34.563477', 'step': 18295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:34.605507', 'step': 18295, 'epoch': 3} {'type': 'loss', 'content': 0.17192097008228302, 'timestamp': '2025-10-01 04:35:34.629677', 'step': 18296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:34.666519', 'step': 18296, 'epoch': 3} {'type': 'loss', 'content': 0.04038281738758087, 'timestamp': '2025-10-01 04:35:34.668636', 'step': 18297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:34.714308', 'step': 18297, 'epoch': 3} {'type': 'loss', 'content': 0.059869349002838135, 'timestamp': '2025-10-01 04:35:34.716540', 'step': 18298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:34.754968', 'step': 18298, 'epoch': 3} {'type': 'loss', 'content': 0.06324996054172516, 'timestamp': '2025-10-01 04:35:34.771277', 'step': 18299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:34.808090', 'step': 18299, 'epoch': 3} {'type': 'loss', 'content': 0.08405616134405136, 'timestamp': '2025-10-01 04:35:34.832756', 'step': 18300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:34.865900', 'step': 18300, 'epoch': 3} {'type': 'loss', 'content': 0.08741991221904755, 'timestamp': '2025-10-01 04:35:34.867994', 'step': 18301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:34.900355', 'step': 18301, 'epoch': 3} {'type': 'loss', 'content': 0.1031753346323967, 'timestamp': '2025-10-01 04:35:34.903174', 'step': 18302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:34.935916', 'step': 18302, 'epoch': 3} {'type': 'loss', 'content': 0.04942679405212402, 'timestamp': '2025-10-01 04:35:34.938450', 'step': 18303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:34.969111', 'step': 18303, 'epoch': 3} {'type': 'loss', 'content': 0.026592880487442017, 'timestamp': '2025-10-01 04:35:34.992933', 'step': 18304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:35.025159', 'step': 18304, 'epoch': 3} {'type': 'loss', 'content': 0.05770931765437126, 'timestamp': '2025-10-01 04:35:35.028456', 'step': 18305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:35.059144', 'step': 18305, 'epoch': 3} {'type': 'loss', 'content': 0.09911655634641647, 'timestamp': '2025-10-01 04:35:35.061509', 'step': 18306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:35.092174', 'step': 18306, 'epoch': 3} {'type': 'loss', 'content': 0.06866961717605591, 'timestamp': '2025-10-01 04:35:35.094395', 'step': 18307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:35.125800', 'step': 18307, 'epoch': 3} {'type': 'loss', 'content': 0.03523784503340721, 'timestamp': '2025-10-01 04:35:35.149461', 'step': 18308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:35.182277', 'step': 18308, 'epoch': 3} {'type': 'loss', 'content': 0.07885236293077469, 'timestamp': '2025-10-01 04:35:35.184561', 'step': 18309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:35.218701', 'step': 18309, 'epoch': 3} {'type': 'loss', 'content': 0.061708949506282806, 'timestamp': '2025-10-01 04:35:35.220731', 'step': 18310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:35.256399', 'step': 18310, 'epoch': 3} {'type': 'loss', 'content': 0.10064934194087982, 'timestamp': '2025-10-01 04:35:35.258584', 'step': 18311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:35.290595', 'step': 18311, 'epoch': 3} {'type': 'loss', 'content': 0.11471161246299744, 'timestamp': '2025-10-01 04:35:35.314346', 'step': 18312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:35.346678', 'step': 18312, 'epoch': 3} {'type': 'loss', 'content': 0.05434787645936012, 'timestamp': '2025-10-01 04:35:35.349483', 'step': 18313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:35.386567', 'step': 18313, 'epoch': 3} {'type': 'loss', 'content': 0.05004386603832245, 'timestamp': '2025-10-01 04:35:35.388923', 'step': 18314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:35.429053', 'step': 18314, 'epoch': 3} {'type': 'loss', 'content': 0.051602184772491455, 'timestamp': '2025-10-01 04:35:35.431516', 'step': 18315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:35.465053', 'step': 18315, 'epoch': 3} {'type': 'loss', 'content': 0.102230504155159, 'timestamp': '2025-10-01 04:35:35.494910', 'step': 18316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:35.531691', 'step': 18316, 'epoch': 3} {'type': 'loss', 'content': 0.10571812838315964, 'timestamp': '2025-10-01 04:35:35.534506', 'step': 18317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:35.568082', 'step': 18317, 'epoch': 3} {'type': 'loss', 'content': 0.07982496172189713, 'timestamp': '2025-10-01 04:35:35.570446', 'step': 18318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:35.602462', 'step': 18318, 'epoch': 3} {'type': 'loss', 'content': 0.1263265609741211, 'timestamp': '2025-10-01 04:35:35.604526', 'step': 18319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:35.637561', 'step': 18319, 'epoch': 3} {'type': 'loss', 'content': 0.028796503320336342, 'timestamp': '2025-10-01 04:35:35.661625', 'step': 18320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:35.699805', 'step': 18320, 'epoch': 3} {'type': 'loss', 'content': 0.08391211181879044, 'timestamp': '2025-10-01 04:35:35.702342', 'step': 18321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:35.735798', 'step': 18321, 'epoch': 3} {'type': 'loss', 'content': 0.10461471229791641, 'timestamp': '2025-10-01 04:35:35.739110', 'step': 18322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:35.772582', 'step': 18322, 'epoch': 3} {'type': 'loss', 'content': 0.09125060588121414, 'timestamp': '2025-10-01 04:35:35.775807', 'step': 18323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:35.809081', 'step': 18323, 'epoch': 3} {'type': 'loss', 'content': 0.07495860010385513, 'timestamp': '2025-10-01 04:35:35.833104', 'step': 18324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:35.870553', 'step': 18324, 'epoch': 3} {'type': 'loss', 'content': 0.07039298862218857, 'timestamp': '2025-10-01 04:35:35.872758', 'step': 18325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:35.905747', 'step': 18325, 'epoch': 3} {'type': 'loss', 'content': 0.06234685331583023, 'timestamp': '2025-10-01 04:35:35.907934', 'step': 18326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:35.966957', 'step': 18326, 'epoch': 3} {'type': 'loss', 'content': 0.07618921250104904, 'timestamp': '2025-10-01 04:35:35.969810', 'step': 18327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:36.001420', 'step': 18327, 'epoch': 3} {'type': 'loss', 'content': 0.06904396414756775, 'timestamp': '2025-10-01 04:35:36.025029', 'step': 18328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:36.057377', 'step': 18328, 'epoch': 3} {'type': 'loss', 'content': 0.020216066390275955, 'timestamp': '2025-10-01 04:35:36.059534', 'step': 18329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:36.095653', 'step': 18329, 'epoch': 3} {'type': 'loss', 'content': 0.03503495082259178, 'timestamp': '2025-10-01 04:35:36.098008', 'step': 18330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:36.133914', 'step': 18330, 'epoch': 3} {'type': 'loss', 'content': 0.032136108726263046, 'timestamp': '2025-10-01 04:35:36.136221', 'step': 18331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:36.169169', 'step': 18331, 'epoch': 3} {'type': 'loss', 'content': 0.10231135785579681, 'timestamp': '2025-10-01 04:35:36.193035', 'step': 18332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:36.224280', 'step': 18332, 'epoch': 3} {'type': 'loss', 'content': 0.08921989798545837, 'timestamp': '2025-10-01 04:35:36.226425', 'step': 18333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:36.262806', 'step': 18333, 'epoch': 3} {'type': 'loss', 'content': 0.0333503857254982, 'timestamp': '2025-10-01 04:35:36.265894', 'step': 18334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:36.303228', 'step': 18334, 'epoch': 3} {'type': 'loss', 'content': 0.06003279611468315, 'timestamp': '2025-10-01 04:35:36.305359', 'step': 18335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:36.336148', 'step': 18335, 'epoch': 3} {'type': 'loss', 'content': 0.07937747985124588, 'timestamp': '2025-10-01 04:35:36.359760', 'step': 18336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:36.391738', 'step': 18336, 'epoch': 3} {'type': 'loss', 'content': 0.015454455278813839, 'timestamp': '2025-10-01 04:35:36.394265', 'step': 18337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:36.430225', 'step': 18337, 'epoch': 3} {'type': 'loss', 'content': 0.049407463520765305, 'timestamp': '2025-10-01 04:35:36.432325', 'step': 18338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:36.466367', 'step': 18338, 'epoch': 3} {'type': 'loss', 'content': 0.041585732251405716, 'timestamp': '2025-10-01 04:35:36.468607', 'step': 18339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:36.500989', 'step': 18339, 'epoch': 3} {'type': 'loss', 'content': 0.024531209841370583, 'timestamp': '2025-10-01 04:35:36.524613', 'step': 18340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:36.561601', 'step': 18340, 'epoch': 3} {'type': 'loss', 'content': 0.06456758826971054, 'timestamp': '2025-10-01 04:35:36.563749', 'step': 18341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:36.594973', 'step': 18341, 'epoch': 3} {'type': 'loss', 'content': 0.052874647080898285, 'timestamp': '2025-10-01 04:35:36.597024', 'step': 18342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:36.628884', 'step': 18342, 'epoch': 3} {'type': 'loss', 'content': 0.08519290387630463, 'timestamp': '2025-10-01 04:35:36.631028', 'step': 18343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:36.662848', 'step': 18343, 'epoch': 3} {'type': 'loss', 'content': 0.05353935435414314, 'timestamp': '2025-10-01 04:35:36.686541', 'step': 18344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:36.719036', 'step': 18344, 'epoch': 3} {'type': 'loss', 'content': 0.037807781249284744, 'timestamp': '2025-10-01 04:35:36.721336', 'step': 18345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:36.753230', 'step': 18345, 'epoch': 3} {'type': 'loss', 'content': 0.04537027329206467, 'timestamp': '2025-10-01 04:35:36.769034', 'step': 18346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:36.811089', 'step': 18346, 'epoch': 3} {'type': 'loss', 'content': 0.12042021751403809, 'timestamp': '2025-10-01 04:35:36.821033', 'step': 18347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:36.854270', 'step': 18347, 'epoch': 3} {'type': 'loss', 'content': 0.1072762981057167, 'timestamp': '2025-10-01 04:35:36.877952', 'step': 18348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:36.911299', 'step': 18348, 'epoch': 3} {'type': 'loss', 'content': 0.046620000153779984, 'timestamp': '2025-10-01 04:35:36.913478', 'step': 18349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:36.945838', 'step': 18349, 'epoch': 3} {'type': 'loss', 'content': 0.1365247368812561, 'timestamp': '2025-10-01 04:35:36.948238', 'step': 18350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:36.981797', 'step': 18350, 'epoch': 3} {'type': 'loss', 'content': 0.07842370122671127, 'timestamp': '2025-10-01 04:35:36.983982', 'step': 18351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:37.017000', 'step': 18351, 'epoch': 3} {'type': 'loss', 'content': 0.07184962928295135, 'timestamp': '2025-10-01 04:35:37.040573', 'step': 18352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:37.074521', 'step': 18352, 'epoch': 3} {'type': 'loss', 'content': 0.03875620290637016, 'timestamp': '2025-10-01 04:35:37.076773', 'step': 18353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:37.110779', 'step': 18353, 'epoch': 3} {'type': 'loss', 'content': 0.061440952122211456, 'timestamp': '2025-10-01 04:35:37.112974', 'step': 18354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:37.145643', 'step': 18354, 'epoch': 3} {'type': 'loss', 'content': 0.04918750002980232, 'timestamp': '2025-10-01 04:35:37.148192', 'step': 18355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:37.189992', 'step': 18355, 'epoch': 3} {'type': 'loss', 'content': 0.1558278352022171, 'timestamp': '2025-10-01 04:35:37.214057', 'step': 18356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:37.252982', 'step': 18356, 'epoch': 3} {'type': 'loss', 'content': 0.024757390841841698, 'timestamp': '2025-10-01 04:35:37.255577', 'step': 18357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:37.300404', 'step': 18357, 'epoch': 3} {'type': 'loss', 'content': 0.15209032595157623, 'timestamp': '2025-10-01 04:35:37.302674', 'step': 18358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:37.350566', 'step': 18358, 'epoch': 3} {'type': 'loss', 'content': 0.04412614926695824, 'timestamp': '2025-10-01 04:35:37.353052', 'step': 18359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:37.384851', 'step': 18359, 'epoch': 3} {'type': 'loss', 'content': 0.05917644128203392, 'timestamp': '2025-10-01 04:35:37.408740', 'step': 18360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:37.444279', 'step': 18360, 'epoch': 3} {'type': 'loss', 'content': 0.059673015028238297, 'timestamp': '2025-10-01 04:35:37.446606', 'step': 18361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:37.478867', 'step': 18361, 'epoch': 3} {'type': 'loss', 'content': 0.12169314920902252, 'timestamp': '2025-10-01 04:35:37.480939', 'step': 18362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:37.513593', 'step': 18362, 'epoch': 3} {'type': 'loss', 'content': 0.0755191221833229, 'timestamp': '2025-10-01 04:35:37.516511', 'step': 18363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:37.546783', 'step': 18363, 'epoch': 3} {'type': 'loss', 'content': 0.03662774711847305, 'timestamp': '2025-10-01 04:35:37.574591', 'step': 18364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:37.606553', 'step': 18364, 'epoch': 3} {'type': 'loss', 'content': 0.08056177198886871, 'timestamp': '2025-10-01 04:35:37.608647', 'step': 18365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:37.643350', 'step': 18365, 'epoch': 3} {'type': 'loss', 'content': 0.05373441427946091, 'timestamp': '2025-10-01 04:35:37.645640', 'step': 18366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:37.677089', 'step': 18366, 'epoch': 3} {'type': 'loss', 'content': 0.022859279066324234, 'timestamp': '2025-10-01 04:35:37.679375', 'step': 18367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:37.712563', 'step': 18367, 'epoch': 3} {'type': 'loss', 'content': 0.07628734409809113, 'timestamp': '2025-10-01 04:35:37.736748', 'step': 18368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:37.770698', 'step': 18368, 'epoch': 3} {'type': 'loss', 'content': 0.07115760445594788, 'timestamp': '2025-10-01 04:35:37.783079', 'step': 18369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:37.816268', 'step': 18369, 'epoch': 3} {'type': 'loss', 'content': 0.03207849711179733, 'timestamp': '2025-10-01 04:35:37.818976', 'step': 18370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:37.849703', 'step': 18370, 'epoch': 3} {'type': 'loss', 'content': 0.080869659781456, 'timestamp': '2025-10-01 04:35:37.851907', 'step': 18371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:37.884106', 'step': 18371, 'epoch': 3} {'type': 'loss', 'content': 0.12010914832353592, 'timestamp': '2025-10-01 04:35:37.908141', 'step': 18372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:37.938979', 'step': 18372, 'epoch': 3} {'type': 'loss', 'content': 0.04663870483636856, 'timestamp': '2025-10-01 04:35:37.947168', 'step': 18373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:37.977870', 'step': 18373, 'epoch': 3} {'type': 'loss', 'content': 0.07122759521007538, 'timestamp': '2025-10-01 04:35:37.980392', 'step': 18374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.013047', 'step': 18374, 'epoch': 3} {'type': 'loss', 'content': 0.04004719480872154, 'timestamp': '2025-10-01 04:35:38.023402', 'step': 18375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:38.054409', 'step': 18375, 'epoch': 3} {'type': 'loss', 'content': 0.09679179638624191, 'timestamp': '2025-10-01 04:35:38.078628', 'step': 18376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.111089', 'step': 18376, 'epoch': 3} {'type': 'loss', 'content': 0.015797903761267662, 'timestamp': '2025-10-01 04:35:38.113327', 'step': 18377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.145687', 'step': 18377, 'epoch': 3} {'type': 'loss', 'content': 0.045473676174879074, 'timestamp': '2025-10-01 04:35:38.148243', 'step': 18378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.180144', 'step': 18378, 'epoch': 3} {'type': 'loss', 'content': 0.07526744157075882, 'timestamp': '2025-10-01 04:35:38.184074', 'step': 18379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:38.216143', 'step': 18379, 'epoch': 3} {'type': 'loss', 'content': 0.07375594228506088, 'timestamp': '2025-10-01 04:35:38.240651', 'step': 18380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:38.271957', 'step': 18380, 'epoch': 3} {'type': 'loss', 'content': 0.044997766613960266, 'timestamp': '2025-10-01 04:35:38.274290', 'step': 18381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:38.308301', 'step': 18381, 'epoch': 3} {'type': 'loss', 'content': 0.019339188933372498, 'timestamp': '2025-10-01 04:35:38.310915', 'step': 18382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.344427', 'step': 18382, 'epoch': 3} {'type': 'loss', 'content': 0.09168511629104614, 'timestamp': '2025-10-01 04:35:38.347224', 'step': 18383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.381924', 'step': 18383, 'epoch': 3} {'type': 'loss', 'content': 0.08805537968873978, 'timestamp': '2025-10-01 04:35:38.405936', 'step': 18384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.439019', 'step': 18384, 'epoch': 3} {'type': 'loss', 'content': 0.07570625841617584, 'timestamp': '2025-10-01 04:35:38.441401', 'step': 18385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.473320', 'step': 18385, 'epoch': 3} {'type': 'loss', 'content': 0.031664349138736725, 'timestamp': '2025-10-01 04:35:38.476065', 'step': 18386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.507004', 'step': 18386, 'epoch': 3} {'type': 'loss', 'content': 0.10964427143335342, 'timestamp': '2025-10-01 04:35:38.509575', 'step': 18387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:38.541101', 'step': 18387, 'epoch': 3} {'type': 'loss', 'content': 0.03611559048295021, 'timestamp': '2025-10-01 04:35:38.564853', 'step': 18388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:38.595226', 'step': 18388, 'epoch': 3} {'type': 'loss', 'content': 0.057344164699316025, 'timestamp': '2025-10-01 04:35:38.601322', 'step': 18389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.634321', 'step': 18389, 'epoch': 3} {'type': 'loss', 'content': 0.07306665927171707, 'timestamp': '2025-10-01 04:35:38.636828', 'step': 18390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:38.668130', 'step': 18390, 'epoch': 3} {'type': 'loss', 'content': 0.1081477701663971, 'timestamp': '2025-10-01 04:35:38.670525', 'step': 18391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:38.703033', 'step': 18391, 'epoch': 3} {'type': 'loss', 'content': 0.07185826450586319, 'timestamp': '2025-10-01 04:35:38.726983', 'step': 18392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:38.757455', 'step': 18392, 'epoch': 3} {'type': 'loss', 'content': 0.05477626994252205, 'timestamp': '2025-10-01 04:35:38.760027', 'step': 18393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:38.790117', 'step': 18393, 'epoch': 3} {'type': 'loss', 'content': 0.04632826894521713, 'timestamp': '2025-10-01 04:35:38.794260', 'step': 18394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:38.827030', 'step': 18394, 'epoch': 3} {'type': 'loss', 'content': 0.10345964878797531, 'timestamp': '2025-10-01 04:35:38.829500', 'step': 18395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:38.864750', 'step': 18395, 'epoch': 3} {'type': 'loss', 'content': 0.1606195867061615, 'timestamp': '2025-10-01 04:35:38.888511', 'step': 18396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.919847', 'step': 18396, 'epoch': 3} {'type': 'loss', 'content': 0.03477516397833824, 'timestamp': '2025-10-01 04:35:38.922356', 'step': 18397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:38.953014', 'step': 18397, 'epoch': 3} {'type': 'loss', 'content': 0.09789076447486877, 'timestamp': '2025-10-01 04:35:38.955463', 'step': 18398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:38.985827', 'step': 18398, 'epoch': 3} {'type': 'loss', 'content': 0.06918634474277496, 'timestamp': '2025-10-01 04:35:38.988359', 'step': 18399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:39.018870', 'step': 18399, 'epoch': 3} {'type': 'loss', 'content': 0.06467477232217789, 'timestamp': '2025-10-01 04:35:39.047945', 'step': 18400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:39.079751', 'step': 18400, 'epoch': 3} {'type': 'loss', 'content': 0.1054486557841301, 'timestamp': '2025-10-01 04:35:39.082020', 'step': 18401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:39.112805', 'step': 18401, 'epoch': 3} {'type': 'loss', 'content': 0.14384107291698456, 'timestamp': '2025-10-01 04:35:39.115466', 'step': 18402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:39.148822', 'step': 18402, 'epoch': 3} {'type': 'loss', 'content': 0.07826689630746841, 'timestamp': '2025-10-01 04:35:39.151237', 'step': 18403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:39.183486', 'step': 18403, 'epoch': 3} {'type': 'loss', 'content': 0.06516257673501968, 'timestamp': '2025-10-01 04:35:39.207708', 'step': 18404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:39.238734', 'step': 18404, 'epoch': 3} {'type': 'loss', 'content': 0.08161091804504395, 'timestamp': '2025-10-01 04:35:39.250185', 'step': 18405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:39.281680', 'step': 18405, 'epoch': 3} {'type': 'loss', 'content': 0.055246852338314056, 'timestamp': '2025-10-01 04:35:39.283897', 'step': 18406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:39.315250', 'step': 18406, 'epoch': 3} {'type': 'loss', 'content': 0.022668590769171715, 'timestamp': '2025-10-01 04:35:39.317533', 'step': 18407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:39.348235', 'step': 18407, 'epoch': 3} {'type': 'loss', 'content': 0.04557271674275398, 'timestamp': '2025-10-01 04:35:39.372019', 'step': 18408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:39.402443', 'step': 18408, 'epoch': 3} {'type': 'loss', 'content': 0.07769965380430222, 'timestamp': '2025-10-01 04:35:39.405053', 'step': 18409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:39.438688', 'step': 18409, 'epoch': 3} {'type': 'loss', 'content': 0.013574481941759586, 'timestamp': '2025-10-01 04:35:39.454505', 'step': 18410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:39.488061', 'step': 18410, 'epoch': 3} {'type': 'loss', 'content': 0.05408009514212608, 'timestamp': '2025-10-01 04:35:39.490274', 'step': 18411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:39.522888', 'step': 18411, 'epoch': 3} {'type': 'loss', 'content': 0.13428837060928345, 'timestamp': '2025-10-01 04:35:39.547035', 'step': 18412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:39.594006', 'step': 18412, 'epoch': 3} {'type': 'loss', 'content': 0.018268277868628502, 'timestamp': '2025-10-01 04:35:39.596134', 'step': 18413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:39.626638', 'step': 18413, 'epoch': 3} {'type': 'loss', 'content': 0.08071388304233551, 'timestamp': '2025-10-01 04:35:39.628758', 'step': 18414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:39.659718', 'step': 18414, 'epoch': 3} {'type': 'loss', 'content': 0.042758431285619736, 'timestamp': '2025-10-01 04:35:39.662586', 'step': 18415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:39.692945', 'step': 18415, 'epoch': 3} {'type': 'loss', 'content': 0.10685094445943832, 'timestamp': '2025-10-01 04:35:39.717152', 'step': 18416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:35:39.748064', 'step': 18416, 'epoch': 3} {'type': 'loss', 'content': 0.027963537722826004, 'timestamp': '2025-10-01 04:35:39.750328', 'step': 18417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:39.783040', 'step': 18417, 'epoch': 3} {'type': 'loss', 'content': 0.09828688204288483, 'timestamp': '2025-10-01 04:35:39.786273', 'step': 18418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:39.818692', 'step': 18418, 'epoch': 3} {'type': 'loss', 'content': 0.02903803065419197, 'timestamp': '2025-10-01 04:35:39.821910', 'step': 18419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:39.858552', 'step': 18419, 'epoch': 3} {'type': 'loss', 'content': 0.058700162917375565, 'timestamp': '2025-10-01 04:35:39.882758', 'step': 18420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:39.913289', 'step': 18420, 'epoch': 3} {'type': 'loss', 'content': 0.07300008088350296, 'timestamp': '2025-10-01 04:35:39.915406', 'step': 18421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:39.946087', 'step': 18421, 'epoch': 3} {'type': 'loss', 'content': 0.07322684675455093, 'timestamp': '2025-10-01 04:35:39.950863', 'step': 18422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:39.982209', 'step': 18422, 'epoch': 3} {'type': 'loss', 'content': 0.06054225191473961, 'timestamp': '2025-10-01 04:35:39.984506', 'step': 18423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:40.016022', 'step': 18423, 'epoch': 3} {'type': 'loss', 'content': 0.06680160760879517, 'timestamp': '2025-10-01 04:35:40.040019', 'step': 18424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:40.070994', 'step': 18424, 'epoch': 3} {'type': 'loss', 'content': 0.06122394651174545, 'timestamp': '2025-10-01 04:35:40.073094', 'step': 18425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:40.104367', 'step': 18425, 'epoch': 3} {'type': 'loss', 'content': 0.0506526380777359, 'timestamp': '2025-10-01 04:35:40.106917', 'step': 18426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:40.138048', 'step': 18426, 'epoch': 3} {'type': 'loss', 'content': 0.02887386828660965, 'timestamp': '2025-10-01 04:35:40.140206', 'step': 18427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:40.172365', 'step': 18427, 'epoch': 3} {'type': 'loss', 'content': 0.09508015215396881, 'timestamp': '2025-10-01 04:35:40.196089', 'step': 18428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:40.227177', 'step': 18428, 'epoch': 3} {'type': 'loss', 'content': 0.08936842530965805, 'timestamp': '2025-10-01 04:35:40.229642', 'step': 18429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:40.261101', 'step': 18429, 'epoch': 3} {'type': 'loss', 'content': 0.09371701627969742, 'timestamp': '2025-10-01 04:35:40.263294', 'step': 18430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:40.295127', 'step': 18430, 'epoch': 3} {'type': 'loss', 'content': 0.07498597353696823, 'timestamp': '2025-10-01 04:35:40.297326', 'step': 18431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:40.329392', 'step': 18431, 'epoch': 3} {'type': 'loss', 'content': 0.08729348331689835, 'timestamp': '2025-10-01 04:35:40.353429', 'step': 18432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:40.384917', 'step': 18432, 'epoch': 3} {'type': 'loss', 'content': 0.04418893903493881, 'timestamp': '2025-10-01 04:35:40.387270', 'step': 18433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:40.419837', 'step': 18433, 'epoch': 3} {'type': 'loss', 'content': 0.09619726985692978, 'timestamp': '2025-10-01 04:35:40.423789', 'step': 18434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:40.455858', 'step': 18434, 'epoch': 3} {'type': 'loss', 'content': 0.08019939064979553, 'timestamp': '2025-10-01 04:35:40.457894', 'step': 18435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:40.488979', 'step': 18435, 'epoch': 3} {'type': 'loss', 'content': 0.048269838094711304, 'timestamp': '2025-10-01 04:35:40.512792', 'step': 18436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:40.544647', 'step': 18436, 'epoch': 3} {'type': 'loss', 'content': 0.1050855815410614, 'timestamp': '2025-10-01 04:35:40.546770', 'step': 18437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:40.577602', 'step': 18437, 'epoch': 3} {'type': 'loss', 'content': 0.08756354451179504, 'timestamp': '2025-10-01 04:35:40.580406', 'step': 18438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:40.611476', 'step': 18438, 'epoch': 3} {'type': 'loss', 'content': 0.06871737539768219, 'timestamp': '2025-10-01 04:35:40.613899', 'step': 18439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:40.647081', 'step': 18439, 'epoch': 3} {'type': 'loss', 'content': 0.11358945816755295, 'timestamp': '2025-10-01 04:35:40.670836', 'step': 18440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:40.702084', 'step': 18440, 'epoch': 3} {'type': 'loss', 'content': 0.058917563408613205, 'timestamp': '2025-10-01 04:35:40.704306', 'step': 18441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:40.735572', 'step': 18441, 'epoch': 3} {'type': 'loss', 'content': 0.06488378345966339, 'timestamp': '2025-10-01 04:35:40.737752', 'step': 18442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:40.769066', 'step': 18442, 'epoch': 3} {'type': 'loss', 'content': 0.11160529404878616, 'timestamp': '2025-10-01 04:35:40.772704', 'step': 18443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:40.803716', 'step': 18443, 'epoch': 3} {'type': 'loss', 'content': 0.03373027965426445, 'timestamp': '2025-10-01 04:35:40.827476', 'step': 18444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:35:40.869104', 'step': 18444, 'epoch': 3} {'type': 'loss', 'content': 0.05015919357538223, 'timestamp': '2025-10-01 04:35:40.871387', 'step': 18445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:40.901930', 'step': 18445, 'epoch': 3} {'type': 'loss', 'content': 0.03572218120098114, 'timestamp': '2025-10-01 04:35:40.904205', 'step': 18446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:40.934458', 'step': 18446, 'epoch': 3} {'type': 'loss', 'content': 0.08047226071357727, 'timestamp': '2025-10-01 04:35:40.937292', 'step': 18447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:40.967882', 'step': 18447, 'epoch': 3} {'type': 'loss', 'content': 0.034450728446245193, 'timestamp': '2025-10-01 04:35:40.991782', 'step': 18448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:41.022373', 'step': 18448, 'epoch': 3} {'type': 'loss', 'content': 0.05666918680071831, 'timestamp': '2025-10-01 04:35:41.024536', 'step': 18449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:41.054912', 'step': 18449, 'epoch': 3} {'type': 'loss', 'content': 0.07222471386194229, 'timestamp': '2025-10-01 04:35:41.058169', 'step': 18450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:41.088566', 'step': 18450, 'epoch': 3} {'type': 'loss', 'content': 0.027496257796883583, 'timestamp': '2025-10-01 04:35:41.090765', 'step': 18451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:41.121959', 'step': 18451, 'epoch': 3} {'type': 'loss', 'content': 0.0588822141289711, 'timestamp': '2025-10-01 04:35:41.145581', 'step': 18452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:41.176696', 'step': 18452, 'epoch': 3} {'type': 'loss', 'content': 0.1037728413939476, 'timestamp': '2025-10-01 04:35:41.179160', 'step': 18453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:41.209504', 'step': 18453, 'epoch': 3} {'type': 'loss', 'content': 0.08042415976524353, 'timestamp': '2025-10-01 04:35:41.212057', 'step': 18454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:41.251990', 'step': 18454, 'epoch': 3} {'type': 'loss', 'content': 0.04258943349123001, 'timestamp': '2025-10-01 04:35:41.254055', 'step': 18455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:41.284689', 'step': 18455, 'epoch': 3} {'type': 'loss', 'content': 0.10744704306125641, 'timestamp': '2025-10-01 04:35:41.308521', 'step': 18456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:41.339831', 'step': 18456, 'epoch': 3} {'type': 'loss', 'content': 0.056278377771377563, 'timestamp': '2025-10-01 04:35:41.342098', 'step': 18457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:41.372367', 'step': 18457, 'epoch': 3} {'type': 'loss', 'content': 0.044332005083560944, 'timestamp': '2025-10-01 04:35:41.374521', 'step': 18458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:41.404855', 'step': 18458, 'epoch': 3} {'type': 'loss', 'content': 0.10692262649536133, 'timestamp': '2025-10-01 04:35:41.407146', 'step': 18459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:41.437626', 'step': 18459, 'epoch': 3} {'type': 'loss', 'content': 0.10106018930673599, 'timestamp': '2025-10-01 04:35:41.461344', 'step': 18460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:41.492903', 'step': 18460, 'epoch': 3} {'type': 'loss', 'content': 0.08087458461523056, 'timestamp': '2025-10-01 04:35:41.495156', 'step': 18461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:41.526159', 'step': 18461, 'epoch': 3} {'type': 'loss', 'content': 0.12324420362710953, 'timestamp': '2025-10-01 04:35:41.528457', 'step': 18462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:41.560143', 'step': 18462, 'epoch': 3} {'type': 'loss', 'content': 0.05804562568664551, 'timestamp': '2025-10-01 04:35:41.563538', 'step': 18463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:41.594997', 'step': 18463, 'epoch': 3} {'type': 'loss', 'content': 0.06124546751379967, 'timestamp': '2025-10-01 04:35:41.619358', 'step': 18464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:41.650621', 'step': 18464, 'epoch': 3} {'type': 'loss', 'content': 0.06766461580991745, 'timestamp': '2025-10-01 04:35:41.676495', 'step': 18465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:41.707731', 'step': 18465, 'epoch': 3} {'type': 'loss', 'content': 0.1136232390999794, 'timestamp': '2025-10-01 04:35:41.710034', 'step': 18466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:35:41.742007', 'step': 18466, 'epoch': 3} {'type': 'loss', 'content': 0.06443814188241959, 'timestamp': '2025-10-01 04:35:41.746348', 'step': 18467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:41.777828', 'step': 18467, 'epoch': 3} {'type': 'loss', 'content': 0.055148303508758545, 'timestamp': '2025-10-01 04:35:41.801455', 'step': 18468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:41.832820', 'step': 18468, 'epoch': 3} {'type': 'loss', 'content': 0.03504500538110733, 'timestamp': '2025-10-01 04:35:41.835034', 'step': 18469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:41.865218', 'step': 18469, 'epoch': 3} {'type': 'loss', 'content': 0.06785993278026581, 'timestamp': '2025-10-01 04:35:41.867362', 'step': 18470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:41.897418', 'step': 18470, 'epoch': 3} {'type': 'loss', 'content': 0.053989797830581665, 'timestamp': '2025-10-01 04:35:41.900047', 'step': 18471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:41.931171', 'step': 18471, 'epoch': 3} {'type': 'loss', 'content': 0.044101204723119736, 'timestamp': '2025-10-01 04:35:41.955026', 'step': 18472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:41.985985', 'step': 18472, 'epoch': 3} {'type': 'loss', 'content': 0.049021802842617035, 'timestamp': '2025-10-01 04:35:41.988093', 'step': 18473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:42.019234', 'step': 18473, 'epoch': 3} {'type': 'loss', 'content': 0.0588376447558403, 'timestamp': '2025-10-01 04:35:42.021687', 'step': 18474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:42.052412', 'step': 18474, 'epoch': 3} {'type': 'loss', 'content': 0.05647308751940727, 'timestamp': '2025-10-01 04:35:42.054707', 'step': 18475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:42.085737', 'step': 18475, 'epoch': 3} {'type': 'loss', 'content': 0.03347429260611534, 'timestamp': '2025-10-01 04:35:42.109500', 'step': 18476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:42.139800', 'step': 18476, 'epoch': 3} {'type': 'loss', 'content': 0.06885280460119247, 'timestamp': '2025-10-01 04:35:42.141960', 'step': 18477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:42.171802', 'step': 18477, 'epoch': 3} {'type': 'loss', 'content': 0.06823798269033432, 'timestamp': '2025-10-01 04:35:42.174062', 'step': 18478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:42.205313', 'step': 18478, 'epoch': 3} {'type': 'loss', 'content': 0.10038872808218002, 'timestamp': '2025-10-01 04:35:42.207445', 'step': 18479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:42.237758', 'step': 18479, 'epoch': 3} {'type': 'loss', 'content': 0.013165728189051151, 'timestamp': '2025-10-01 04:35:42.262049', 'step': 18480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:42.294242', 'step': 18480, 'epoch': 3} {'type': 'loss', 'content': 0.09088708460330963, 'timestamp': '2025-10-01 04:35:42.296405', 'step': 18481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:42.328941', 'step': 18481, 'epoch': 3} {'type': 'loss', 'content': 0.117446668446064, 'timestamp': '2025-10-01 04:35:42.331132', 'step': 18482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:42.362272', 'step': 18482, 'epoch': 3} {'type': 'loss', 'content': 0.05945177376270294, 'timestamp': '2025-10-01 04:35:42.365158', 'step': 18483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:42.395725', 'step': 18483, 'epoch': 3} {'type': 'loss', 'content': 0.05759593844413757, 'timestamp': '2025-10-01 04:35:42.419782', 'step': 18484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:42.451344', 'step': 18484, 'epoch': 3} {'type': 'loss', 'content': 0.08357677608728409, 'timestamp': '2025-10-01 04:35:42.453420', 'step': 18485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:42.486181', 'step': 18485, 'epoch': 3} {'type': 'loss', 'content': 0.11083459854125977, 'timestamp': '2025-10-01 04:35:42.488776', 'step': 18486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:42.520377', 'step': 18486, 'epoch': 3} {'type': 'loss', 'content': 0.020464861765503883, 'timestamp': '2025-10-01 04:35:42.522509', 'step': 18487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:42.568519', 'step': 18487, 'epoch': 3} {'type': 'loss', 'content': 0.05291818082332611, 'timestamp': '2025-10-01 04:35:42.592595', 'step': 18488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:42.625574', 'step': 18488, 'epoch': 3} {'type': 'loss', 'content': 0.06210942566394806, 'timestamp': '2025-10-01 04:35:42.628531', 'step': 18489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:42.662864', 'step': 18489, 'epoch': 3} {'type': 'loss', 'content': 0.10306109488010406, 'timestamp': '2025-10-01 04:35:42.666329', 'step': 18490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:42.697893', 'step': 18490, 'epoch': 3} {'type': 'loss', 'content': 0.1137305498123169, 'timestamp': '2025-10-01 04:35:42.700435', 'step': 18491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:42.732831', 'step': 18491, 'epoch': 3} {'type': 'loss', 'content': 0.10195893049240112, 'timestamp': '2025-10-01 04:35:42.756978', 'step': 18492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:42.788367', 'step': 18492, 'epoch': 3} {'type': 'loss', 'content': 0.13070353865623474, 'timestamp': '2025-10-01 04:35:42.790959', 'step': 18493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:42.822351', 'step': 18493, 'epoch': 3} {'type': 'loss', 'content': 0.0839628130197525, 'timestamp': '2025-10-01 04:35:42.825034', 'step': 18494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:42.855996', 'step': 18494, 'epoch': 3} {'type': 'loss', 'content': 0.07195261865854263, 'timestamp': '2025-10-01 04:35:42.858127', 'step': 18495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:42.889346', 'step': 18495, 'epoch': 3} {'type': 'loss', 'content': 0.08281288295984268, 'timestamp': '2025-10-01 04:35:42.913303', 'step': 18496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:42.943806', 'step': 18496, 'epoch': 3} {'type': 'loss', 'content': 0.08504489064216614, 'timestamp': '2025-10-01 04:35:42.946245', 'step': 18497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:42.980392', 'step': 18497, 'epoch': 3} {'type': 'loss', 'content': 0.07758694887161255, 'timestamp': '2025-10-01 04:35:42.982917', 'step': 18498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:43.013269', 'step': 18498, 'epoch': 3} {'type': 'loss', 'content': 0.0491173192858696, 'timestamp': '2025-10-01 04:35:43.016118', 'step': 18499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:43.046597', 'step': 18499, 'epoch': 3} {'type': 'loss', 'content': 0.0473812036216259, 'timestamp': '2025-10-01 04:35:43.070248', 'step': 18500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18500', 'timestamp': '2025-10-01 04:35:47.936578', 'step': 18500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:47.968447', 'step': 18500, 'epoch': 3} {'type': 'loss', 'content': 0.07289090007543564, 'timestamp': '2025-10-01 04:35:47.972272', 'step': 18501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:48.015735', 'step': 18501, 'epoch': 3} {'type': 'loss', 'content': 0.07243359088897705, 'timestamp': '2025-10-01 04:35:48.019350', 'step': 18502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:48.057949', 'step': 18502, 'epoch': 3} {'type': 'loss', 'content': 0.055026374757289886, 'timestamp': '2025-10-01 04:35:48.060109', 'step': 18503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:48.092148', 'step': 18503, 'epoch': 3} {'type': 'loss', 'content': 0.07004968076944351, 'timestamp': '2025-10-01 04:35:48.115983', 'step': 18504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:48.149578', 'step': 18504, 'epoch': 3} {'type': 'loss', 'content': 0.09373404085636139, 'timestamp': '2025-10-01 04:35:48.151960', 'step': 18505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:48.187803', 'step': 18505, 'epoch': 3} {'type': 'loss', 'content': 0.1378157138824463, 'timestamp': '2025-10-01 04:35:48.189912', 'step': 18506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:48.236616', 'step': 18506, 'epoch': 3} {'type': 'loss', 'content': 0.022835001349449158, 'timestamp': '2025-10-01 04:35:48.240160', 'step': 18507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:48.285454', 'step': 18507, 'epoch': 3} {'type': 'loss', 'content': 0.06723052263259888, 'timestamp': '2025-10-01 04:35:48.309738', 'step': 18508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:48.360923', 'step': 18508, 'epoch': 3} {'type': 'loss', 'content': 0.09823175519704819, 'timestamp': '2025-10-01 04:35:48.363095', 'step': 18509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:48.398688', 'step': 18509, 'epoch': 3} {'type': 'loss', 'content': 0.07460460066795349, 'timestamp': '2025-10-01 04:35:48.401574', 'step': 18510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:48.436602', 'step': 18510, 'epoch': 3} {'type': 'loss', 'content': 0.03256434574723244, 'timestamp': '2025-10-01 04:35:48.440519', 'step': 18511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:48.494965', 'step': 18511, 'epoch': 3} {'type': 'loss', 'content': 0.03740009665489197, 'timestamp': '2025-10-01 04:35:48.518516', 'step': 18512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:48.555189', 'step': 18512, 'epoch': 3} {'type': 'loss', 'content': 0.04027382284402847, 'timestamp': '2025-10-01 04:35:48.557116', 'step': 18513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:48.600387', 'step': 18513, 'epoch': 3} {'type': 'loss', 'content': 0.11399557441473007, 'timestamp': '2025-10-01 04:35:48.602580', 'step': 18514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:48.637237', 'step': 18514, 'epoch': 3} {'type': 'loss', 'content': 0.0482681505382061, 'timestamp': '2025-10-01 04:35:48.639370', 'step': 18515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:48.686059', 'step': 18515, 'epoch': 3} {'type': 'loss', 'content': 0.04261159896850586, 'timestamp': '2025-10-01 04:35:48.709762', 'step': 18516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:48.746369', 'step': 18516, 'epoch': 3} {'type': 'loss', 'content': 0.07150469720363617, 'timestamp': '2025-10-01 04:35:48.748437', 'step': 18517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:48.785222', 'step': 18517, 'epoch': 3} {'type': 'loss', 'content': 0.11176647990942001, 'timestamp': '2025-10-01 04:35:48.788105', 'step': 18518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:48.827134', 'step': 18518, 'epoch': 3} {'type': 'loss', 'content': 0.034907713532447815, 'timestamp': '2025-10-01 04:35:48.829400', 'step': 18519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:48.871845', 'step': 18519, 'epoch': 3} {'type': 'loss', 'content': 0.060977254062891006, 'timestamp': '2025-10-01 04:35:48.895318', 'step': 18520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:48.927843', 'step': 18520, 'epoch': 3} {'type': 'loss', 'content': 0.09603184461593628, 'timestamp': '2025-10-01 04:35:48.929995', 'step': 18521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:48.962026', 'step': 18521, 'epoch': 3} {'type': 'loss', 'content': 0.029429662972688675, 'timestamp': '2025-10-01 04:35:48.964081', 'step': 18522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:48.998404', 'step': 18522, 'epoch': 3} {'type': 'loss', 'content': 0.022867068648338318, 'timestamp': '2025-10-01 04:35:49.000635', 'step': 18523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:49.032344', 'step': 18523, 'epoch': 3} {'type': 'loss', 'content': 0.06669007986783981, 'timestamp': '2025-10-01 04:35:49.055709', 'step': 18524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:49.087372', 'step': 18524, 'epoch': 3} {'type': 'loss', 'content': 0.047523755580186844, 'timestamp': '2025-10-01 04:35:49.089564', 'step': 18525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:49.121138', 'step': 18525, 'epoch': 3} {'type': 'loss', 'content': 0.04061717540025711, 'timestamp': '2025-10-01 04:35:49.123449', 'step': 18526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:49.153722', 'step': 18526, 'epoch': 3} {'type': 'loss', 'content': 0.14965026080608368, 'timestamp': '2025-10-01 04:35:49.155855', 'step': 18527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:49.186283', 'step': 18527, 'epoch': 3} {'type': 'loss', 'content': 0.07507802546024323, 'timestamp': '2025-10-01 04:35:49.210005', 'step': 18528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:49.240120', 'step': 18528, 'epoch': 3} {'type': 'loss', 'content': 0.05380253493785858, 'timestamp': '2025-10-01 04:35:49.242321', 'step': 18529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:49.272561', 'step': 18529, 'epoch': 3} {'type': 'loss', 'content': 0.06147032976150513, 'timestamp': '2025-10-01 04:35:49.274442', 'step': 18530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:49.304994', 'step': 18530, 'epoch': 3} {'type': 'loss', 'content': 0.1088372990489006, 'timestamp': '2025-10-01 04:35:49.307147', 'step': 18531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:49.339190', 'step': 18531, 'epoch': 3} {'type': 'loss', 'content': 0.09851880371570587, 'timestamp': '2025-10-01 04:35:49.369637', 'step': 18532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:49.399629', 'step': 18532, 'epoch': 3} {'type': 'loss', 'content': 0.07678907364606857, 'timestamp': '2025-10-01 04:35:49.401643', 'step': 18533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:49.432707', 'step': 18533, 'epoch': 3} {'type': 'loss', 'content': 0.16276365518569946, 'timestamp': '2025-10-01 04:35:49.434775', 'step': 18534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:49.465629', 'step': 18534, 'epoch': 3} {'type': 'loss', 'content': 0.04793133959174156, 'timestamp': '2025-10-01 04:35:49.468055', 'step': 18535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:49.499852', 'step': 18535, 'epoch': 3} {'type': 'loss', 'content': 0.12280367314815521, 'timestamp': '2025-10-01 04:35:49.523402', 'step': 18536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:49.554379', 'step': 18536, 'epoch': 3} {'type': 'loss', 'content': 0.06979099661111832, 'timestamp': '2025-10-01 04:35:49.556537', 'step': 18537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:49.587627', 'step': 18537, 'epoch': 3} {'type': 'loss', 'content': 0.07547764480113983, 'timestamp': '2025-10-01 04:35:49.589746', 'step': 18538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:49.621563', 'step': 18538, 'epoch': 3} {'type': 'loss', 'content': 0.040836088359355927, 'timestamp': '2025-10-01 04:35:49.623663', 'step': 18539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:49.654516', 'step': 18539, 'epoch': 3} {'type': 'loss', 'content': 0.09548123925924301, 'timestamp': '2025-10-01 04:35:49.678103', 'step': 18540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:49.709227', 'step': 18540, 'epoch': 3} {'type': 'loss', 'content': 0.07038507610559464, 'timestamp': '2025-10-01 04:35:49.711463', 'step': 18541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:49.742284', 'step': 18541, 'epoch': 3} {'type': 'loss', 'content': 0.056047871708869934, 'timestamp': '2025-10-01 04:35:49.745116', 'step': 18542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:49.775202', 'step': 18542, 'epoch': 3} {'type': 'loss', 'content': 0.09124965220689774, 'timestamp': '2025-10-01 04:35:49.777514', 'step': 18543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:49.808880', 'step': 18543, 'epoch': 3} {'type': 'loss', 'content': 0.11273503303527832, 'timestamp': '2025-10-01 04:35:49.832745', 'step': 18544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:49.863574', 'step': 18544, 'epoch': 3} {'type': 'loss', 'content': 0.06953571736812592, 'timestamp': '2025-10-01 04:35:49.865822', 'step': 18545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:49.901850', 'step': 18545, 'epoch': 3} {'type': 'loss', 'content': 0.017668569460511208, 'timestamp': '2025-10-01 04:35:49.904038', 'step': 18546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:49.941589', 'step': 18546, 'epoch': 3} {'type': 'loss', 'content': 0.017532749101519585, 'timestamp': '2025-10-01 04:35:49.943767', 'step': 18547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:49.974788', 'step': 18547, 'epoch': 3} {'type': 'loss', 'content': 0.03257621452212334, 'timestamp': '2025-10-01 04:35:49.998539', 'step': 18548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:50.030795', 'step': 18548, 'epoch': 3} {'type': 'loss', 'content': 0.1375826895236969, 'timestamp': '2025-10-01 04:35:50.033010', 'step': 18549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:50.065702', 'step': 18549, 'epoch': 3} {'type': 'loss', 'content': 0.11136017739772797, 'timestamp': '2025-10-01 04:35:50.067943', 'step': 18550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:50.099561', 'step': 18550, 'epoch': 3} {'type': 'loss', 'content': 0.07163643091917038, 'timestamp': '2025-10-01 04:35:50.101660', 'step': 18551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:50.132000', 'step': 18551, 'epoch': 3} {'type': 'loss', 'content': 0.11443007737398148, 'timestamp': '2025-10-01 04:35:50.155813', 'step': 18552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:50.186033', 'step': 18552, 'epoch': 3} {'type': 'loss', 'content': 0.08163369446992874, 'timestamp': '2025-10-01 04:35:50.188233', 'step': 18553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:50.218541', 'step': 18553, 'epoch': 3} {'type': 'loss', 'content': 0.08866599947214127, 'timestamp': '2025-10-01 04:35:50.220933', 'step': 18554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:50.252105', 'step': 18554, 'epoch': 3} {'type': 'loss', 'content': 0.028808509930968285, 'timestamp': '2025-10-01 04:35:50.255386', 'step': 18555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:50.286138', 'step': 18555, 'epoch': 3} {'type': 'loss', 'content': 0.10571084171533585, 'timestamp': '2025-10-01 04:35:50.309517', 'step': 18556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:35:50.344305', 'step': 18556, 'epoch': 3} {'type': 'loss', 'content': 0.04818149283528328, 'timestamp': '2025-10-01 04:35:50.346163', 'step': 18557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:50.376865', 'step': 18557, 'epoch': 3} {'type': 'loss', 'content': 0.07741578668355942, 'timestamp': '2025-10-01 04:35:50.379378', 'step': 18558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:50.409992', 'step': 18558, 'epoch': 3} {'type': 'loss', 'content': 0.07499588280916214, 'timestamp': '2025-10-01 04:35:50.412670', 'step': 18559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:50.444979', 'step': 18559, 'epoch': 3} {'type': 'loss', 'content': 0.09517288208007812, 'timestamp': '2025-10-01 04:35:50.468852', 'step': 18560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:50.499539', 'step': 18560, 'epoch': 3} {'type': 'loss', 'content': 0.07803619652986526, 'timestamp': '2025-10-01 04:35:50.501982', 'step': 18561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:50.536614', 'step': 18561, 'epoch': 3} {'type': 'loss', 'content': 0.04694676026701927, 'timestamp': '2025-10-01 04:35:50.538733', 'step': 18562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:50.570073', 'step': 18562, 'epoch': 3} {'type': 'loss', 'content': 0.05864843353629112, 'timestamp': '2025-10-01 04:35:50.572443', 'step': 18563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:50.603495', 'step': 18563, 'epoch': 3} {'type': 'loss', 'content': 0.04669773206114769, 'timestamp': '2025-10-01 04:35:50.627495', 'step': 18564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:50.658271', 'step': 18564, 'epoch': 3} {'type': 'loss', 'content': 0.049505770206451416, 'timestamp': '2025-10-01 04:35:50.660551', 'step': 18565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:50.692334', 'step': 18565, 'epoch': 3} {'type': 'loss', 'content': 0.08131899684667587, 'timestamp': '2025-10-01 04:35:50.694761', 'step': 18566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:50.725674', 'step': 18566, 'epoch': 3} {'type': 'loss', 'content': 0.018561216071248055, 'timestamp': '2025-10-01 04:35:50.727744', 'step': 18567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:50.758406', 'step': 18567, 'epoch': 3} {'type': 'loss', 'content': 0.12867800891399384, 'timestamp': '2025-10-01 04:35:50.782086', 'step': 18568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:50.812866', 'step': 18568, 'epoch': 3} {'type': 'loss', 'content': 0.0773749053478241, 'timestamp': '2025-10-01 04:35:50.814796', 'step': 18569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:50.853430', 'step': 18569, 'epoch': 3} {'type': 'loss', 'content': 0.11246861517429352, 'timestamp': '2025-10-01 04:35:50.855890', 'step': 18570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:50.892609', 'step': 18570, 'epoch': 3} {'type': 'loss', 'content': 0.10675130784511566, 'timestamp': '2025-10-01 04:35:50.895077', 'step': 18571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-10-01 04:35:50.931654', 'step': 18571, 'epoch': 3} {'type': 'loss', 'content': 0.04712593927979469, 'timestamp': '2025-10-01 04:35:50.966372', 'step': 18572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:50.996821', 'step': 18572, 'epoch': 3} {'type': 'loss', 'content': 0.026016367599368095, 'timestamp': '2025-10-01 04:35:50.999403', 'step': 18573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:51.030534', 'step': 18573, 'epoch': 3} {'type': 'loss', 'content': 0.08223949372768402, 'timestamp': '2025-10-01 04:35:51.032767', 'step': 18574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:51.063355', 'step': 18574, 'epoch': 3} {'type': 'loss', 'content': 0.05998843535780907, 'timestamp': '2025-10-01 04:35:51.066009', 'step': 18575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:51.096732', 'step': 18575, 'epoch': 3} {'type': 'loss', 'content': 0.10923801362514496, 'timestamp': '2025-10-01 04:35:51.120311', 'step': 18576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:51.150710', 'step': 18576, 'epoch': 3} {'type': 'loss', 'content': 0.1219358742237091, 'timestamp': '2025-10-01 04:35:51.153036', 'step': 18577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:51.183249', 'step': 18577, 'epoch': 3} {'type': 'loss', 'content': 0.036364760249853134, 'timestamp': '2025-10-01 04:35:51.185507', 'step': 18578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:51.225215', 'step': 18578, 'epoch': 3} {'type': 'loss', 'content': 0.03432898223400116, 'timestamp': '2025-10-01 04:35:51.227325', 'step': 18579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:51.258046', 'step': 18579, 'epoch': 3} {'type': 'loss', 'content': 0.15225115418434143, 'timestamp': '2025-10-01 04:35:51.281425', 'step': 18580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:51.312184', 'step': 18580, 'epoch': 3} {'type': 'loss', 'content': 0.08170429617166519, 'timestamp': '2025-10-01 04:35:51.314101', 'step': 18581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:51.343952', 'step': 18581, 'epoch': 3} {'type': 'loss', 'content': 0.09254547953605652, 'timestamp': '2025-10-01 04:35:51.346246', 'step': 18582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:51.377081', 'step': 18582, 'epoch': 3} {'type': 'loss', 'content': 0.18662673234939575, 'timestamp': '2025-10-01 04:35:51.379598', 'step': 18583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:51.410434', 'step': 18583, 'epoch': 3} {'type': 'loss', 'content': 0.06137026846408844, 'timestamp': '2025-10-01 04:35:51.434227', 'step': 18584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:35:51.464878', 'step': 18584, 'epoch': 3} {'type': 'loss', 'content': 0.06310440599918365, 'timestamp': '2025-10-01 04:35:51.466979', 'step': 18585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:51.497334', 'step': 18585, 'epoch': 3} {'type': 'loss', 'content': 0.10539615154266357, 'timestamp': '2025-10-01 04:35:51.499888', 'step': 18586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:51.530551', 'step': 18586, 'epoch': 3} {'type': 'loss', 'content': 0.0702575296163559, 'timestamp': '2025-10-01 04:35:51.533103', 'step': 18587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:51.563797', 'step': 18587, 'epoch': 3} {'type': 'loss', 'content': 0.03434992954134941, 'timestamp': '2025-10-01 04:35:51.587753', 'step': 18588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:51.618220', 'step': 18588, 'epoch': 3} {'type': 'loss', 'content': 0.02686307206749916, 'timestamp': '2025-10-01 04:35:51.620791', 'step': 18589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:51.651331', 'step': 18589, 'epoch': 3} {'type': 'loss', 'content': 0.08425439894199371, 'timestamp': '2025-10-01 04:35:51.654020', 'step': 18590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:51.685420', 'step': 18590, 'epoch': 3} {'type': 'loss', 'content': 0.054520078003406525, 'timestamp': '2025-10-01 04:35:51.687699', 'step': 18591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:51.718678', 'step': 18591, 'epoch': 3} {'type': 'loss', 'content': 0.1303255409002304, 'timestamp': '2025-10-01 04:35:51.742037', 'step': 18592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:51.772306', 'step': 18592, 'epoch': 3} {'type': 'loss', 'content': 0.02831650711596012, 'timestamp': '2025-10-01 04:35:51.774252', 'step': 18593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:51.804159', 'step': 18593, 'epoch': 3} {'type': 'loss', 'content': 0.08193174004554749, 'timestamp': '2025-10-01 04:35:51.806399', 'step': 18594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:51.837017', 'step': 18594, 'epoch': 3} {'type': 'loss', 'content': 0.14443883299827576, 'timestamp': '2025-10-01 04:35:51.839097', 'step': 18595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:51.869519', 'step': 18595, 'epoch': 3} {'type': 'loss', 'content': 0.034441154450178146, 'timestamp': '2025-10-01 04:35:51.893371', 'step': 18596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:51.923910', 'step': 18596, 'epoch': 3} {'type': 'loss', 'content': 0.05996587127447128, 'timestamp': '2025-10-01 04:35:51.926158', 'step': 18597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:51.957441', 'step': 18597, 'epoch': 3} {'type': 'loss', 'content': 0.04844168573617935, 'timestamp': '2025-10-01 04:35:51.959419', 'step': 18598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:51.989830', 'step': 18598, 'epoch': 3} {'type': 'loss', 'content': 0.073968306183815, 'timestamp': '2025-10-01 04:35:51.992897', 'step': 18599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:52.022994', 'step': 18599, 'epoch': 3} {'type': 'loss', 'content': 0.13412274420261383, 'timestamp': '2025-10-01 04:35:52.047456', 'step': 18600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:35:52.077844', 'step': 18600, 'epoch': 3} {'type': 'loss', 'content': 0.062425702810287476, 'timestamp': '2025-10-01 04:35:52.080473', 'step': 18601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:52.111000', 'step': 18601, 'epoch': 3} {'type': 'loss', 'content': 0.03129499405622482, 'timestamp': '2025-10-01 04:35:52.113120', 'step': 18602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:52.143598', 'step': 18602, 'epoch': 3} {'type': 'loss', 'content': 0.10779614746570587, 'timestamp': '2025-10-01 04:35:52.145787', 'step': 18603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:52.175622', 'step': 18603, 'epoch': 3} {'type': 'loss', 'content': 0.10280098021030426, 'timestamp': '2025-10-01 04:35:52.199548', 'step': 18604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:52.248600', 'step': 18604, 'epoch': 3} {'type': 'loss', 'content': 0.07456447929143906, 'timestamp': '2025-10-01 04:35:52.250876', 'step': 18605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:52.301136', 'step': 18605, 'epoch': 3} {'type': 'loss', 'content': 0.21429547667503357, 'timestamp': '2025-10-01 04:35:52.303296', 'step': 18606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:52.354226', 'step': 18606, 'epoch': 3} {'type': 'loss', 'content': 0.020724374800920486, 'timestamp': '2025-10-01 04:35:52.356236', 'step': 18607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:52.407352', 'step': 18607, 'epoch': 3} {'type': 'loss', 'content': 0.055091742426157, 'timestamp': '2025-10-01 04:35:52.431533', 'step': 18608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:52.465242', 'step': 18608, 'epoch': 3} {'type': 'loss', 'content': 0.07428717613220215, 'timestamp': '2025-10-01 04:35:52.467580', 'step': 18609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:52.506525', 'step': 18609, 'epoch': 3} {'type': 'loss', 'content': 0.10179563611745834, 'timestamp': '2025-10-01 04:35:52.509064', 'step': 18610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:52.559024', 'step': 18610, 'epoch': 3} {'type': 'loss', 'content': 0.029438279569149017, 'timestamp': '2025-10-01 04:35:52.562619', 'step': 18611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:52.619854', 'step': 18611, 'epoch': 3} {'type': 'loss', 'content': 0.05170990154147148, 'timestamp': '2025-10-01 04:35:52.643352', 'step': 18612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:52.688371', 'step': 18612, 'epoch': 3} {'type': 'loss', 'content': 0.06404279172420502, 'timestamp': '2025-10-01 04:35:52.695916', 'step': 18613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:52.764199', 'step': 18613, 'epoch': 3} {'type': 'loss', 'content': 0.17798471450805664, 'timestamp': '2025-10-01 04:35:52.766679', 'step': 18614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:52.800954', 'step': 18614, 'epoch': 3} {'type': 'loss', 'content': 0.13510893285274506, 'timestamp': '2025-10-01 04:35:52.806472', 'step': 18615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:52.848844', 'step': 18615, 'epoch': 3} {'type': 'loss', 'content': 0.03373921290040016, 'timestamp': '2025-10-01 04:35:52.882690', 'step': 18616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:35:52.916044', 'step': 18616, 'epoch': 3} {'type': 'loss', 'content': 0.023626312613487244, 'timestamp': '2025-10-01 04:35:52.920647', 'step': 18617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:52.968427', 'step': 18617, 'epoch': 3} {'type': 'loss', 'content': 0.02759942039847374, 'timestamp': '2025-10-01 04:35:52.976400', 'step': 18618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:53.019549', 'step': 18618, 'epoch': 3} {'type': 'loss', 'content': 0.05517442151904106, 'timestamp': '2025-10-01 04:35:53.024038', 'step': 18619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:53.062678', 'step': 18619, 'epoch': 3} {'type': 'loss', 'content': 0.038610413670539856, 'timestamp': '2025-10-01 04:35:53.086152', 'step': 18620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:53.117808', 'step': 18620, 'epoch': 3} {'type': 'loss', 'content': 0.07667376101016998, 'timestamp': '2025-10-01 04:35:53.120231', 'step': 18621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:53.151421', 'step': 18621, 'epoch': 3} {'type': 'loss', 'content': 0.06984192132949829, 'timestamp': '2025-10-01 04:35:53.153800', 'step': 18622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:53.184690', 'step': 18622, 'epoch': 3} {'type': 'loss', 'content': 0.06213933229446411, 'timestamp': '2025-10-01 04:35:53.190403', 'step': 18623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:53.236897', 'step': 18623, 'epoch': 3} {'type': 'loss', 'content': 0.06979216635227203, 'timestamp': '2025-10-01 04:35:53.260575', 'step': 18624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:53.318360', 'step': 18624, 'epoch': 3} {'type': 'loss', 'content': 0.10170946270227432, 'timestamp': '2025-10-01 04:35:53.321103', 'step': 18625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:53.366279', 'step': 18625, 'epoch': 3} {'type': 'loss', 'content': 0.05845477059483528, 'timestamp': '2025-10-01 04:35:53.368412', 'step': 18626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:53.403374', 'step': 18626, 'epoch': 3} {'type': 'loss', 'content': 0.0951443463563919, 'timestamp': '2025-10-01 04:35:53.405401', 'step': 18627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:53.473444', 'step': 18627, 'epoch': 3} {'type': 'loss', 'content': 0.1125025525689125, 'timestamp': '2025-10-01 04:35:53.497572', 'step': 18628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:53.540379', 'step': 18628, 'epoch': 3} {'type': 'loss', 'content': 0.08438831567764282, 'timestamp': '2025-10-01 04:35:53.542444', 'step': 18629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:35:53.578594', 'step': 18629, 'epoch': 3} {'type': 'loss', 'content': 0.031065471470355988, 'timestamp': '2025-10-01 04:35:53.580752', 'step': 18630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:53.642184', 'step': 18630, 'epoch': 3} {'type': 'loss', 'content': 0.05326356366276741, 'timestamp': '2025-10-01 04:35:53.644381', 'step': 18631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:53.679012', 'step': 18631, 'epoch': 3} {'type': 'loss', 'content': 0.04836567863821983, 'timestamp': '2025-10-01 04:35:53.702713', 'step': 18632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:53.732991', 'step': 18632, 'epoch': 3} {'type': 'loss', 'content': 0.0968269556760788, 'timestamp': '2025-10-01 04:35:53.735285', 'step': 18633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:53.766957', 'step': 18633, 'epoch': 3} {'type': 'loss', 'content': 0.13688194751739502, 'timestamp': '2025-10-01 04:35:53.769454', 'step': 18634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:35:53.807145', 'step': 18634, 'epoch': 3} {'type': 'loss', 'content': 0.10611092299222946, 'timestamp': '2025-10-01 04:35:53.809759', 'step': 18635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:53.840627', 'step': 18635, 'epoch': 3} {'type': 'loss', 'content': 0.11758146435022354, 'timestamp': '2025-10-01 04:35:53.864515', 'step': 18636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:53.898303', 'step': 18636, 'epoch': 3} {'type': 'loss', 'content': 0.0441434383392334, 'timestamp': '2025-10-01 04:35:53.900556', 'step': 18637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:35:53.931202', 'step': 18637, 'epoch': 3} {'type': 'loss', 'content': 0.022154878824949265, 'timestamp': '2025-10-01 04:35:53.933423', 'step': 18638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:35:53.983686', 'step': 18638, 'epoch': 3} {'type': 'loss', 'content': 0.09367996454238892, 'timestamp': '2025-10-01 04:35:53.985894', 'step': 18639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:35:54.016299', 'step': 18639, 'epoch': 3} {'type': 'loss', 'content': 0.07075914740562439, 'timestamp': '2025-10-01 04:35:54.040148', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:36:02.517423', 'step': 18640, 'epoch': 3} {'type': 'pplx', 'content': 14735.545548537255, 'timestamp': '2025-10-01 04:36:02.520146', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:02.549378', 'step': 18640, 'epoch': 3} {'type': 'loss', 'content': 0.08318712562322617, 'timestamp': '2025-10-01 04:36:02.551864', 'step': 18641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:02.583442', 'step': 18641, 'epoch': 3} {'type': 'loss', 'content': 0.02510487660765648, 'timestamp': '2025-10-01 04:36:02.591230', 'step': 18642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:02.634607', 'step': 18642, 'epoch': 3} {'type': 'loss', 'content': 0.08156406134366989, 'timestamp': '2025-10-01 04:36:02.636991', 'step': 18643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:02.667122', 'step': 18643, 'epoch': 3} {'type': 'loss', 'content': 0.060886748135089874, 'timestamp': '2025-10-01 04:36:02.690920', 'step': 18644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:02.729273', 'step': 18644, 'epoch': 3} {'type': 'loss', 'content': 0.07696682959794998, 'timestamp': '2025-10-01 04:36:02.732436', 'step': 18645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:02.765008', 'step': 18645, 'epoch': 3} {'type': 'loss', 'content': 0.1034868136048317, 'timestamp': '2025-10-01 04:36:02.767529', 'step': 18646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:02.798496', 'step': 18646, 'epoch': 3} {'type': 'loss', 'content': 0.10627423226833344, 'timestamp': '2025-10-01 04:36:02.801274', 'step': 18647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:02.832219', 'step': 18647, 'epoch': 3} {'type': 'loss', 'content': 0.05950426310300827, 'timestamp': '2025-10-01 04:36:02.856725', 'step': 18648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:02.888369', 'step': 18648, 'epoch': 3} {'type': 'loss', 'content': 0.017766520380973816, 'timestamp': '2025-10-01 04:36:02.891064', 'step': 18649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:02.922804', 'step': 18649, 'epoch': 3} {'type': 'loss', 'content': 0.1210094466805458, 'timestamp': '2025-10-01 04:36:02.925168', 'step': 18650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:02.962468', 'step': 18650, 'epoch': 3} {'type': 'loss', 'content': 0.08021334558725357, 'timestamp': '2025-10-01 04:36:02.967690', 'step': 18651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:02.998659', 'step': 18651, 'epoch': 3} {'type': 'loss', 'content': 0.175408735871315, 'timestamp': '2025-10-01 04:36:03.027923', 'step': 18652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:03.064911', 'step': 18652, 'epoch': 3} {'type': 'loss', 'content': 0.13759849965572357, 'timestamp': '2025-10-01 04:36:03.067655', 'step': 18653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:03.112884', 'step': 18653, 'epoch': 3} {'type': 'loss', 'content': 0.06008436158299446, 'timestamp': '2025-10-01 04:36:03.114970', 'step': 18654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:03.146025', 'step': 18654, 'epoch': 3} {'type': 'loss', 'content': 0.0537092350423336, 'timestamp': '2025-10-01 04:36:03.148136', 'step': 18655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:03.179016', 'step': 18655, 'epoch': 3} {'type': 'loss', 'content': 0.08022122830152512, 'timestamp': '2025-10-01 04:36:03.209958', 'step': 18656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:03.240820', 'step': 18656, 'epoch': 3} {'type': 'loss', 'content': 0.05126621574163437, 'timestamp': '2025-10-01 04:36:03.248007', 'step': 18657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:03.278808', 'step': 18657, 'epoch': 3} {'type': 'loss', 'content': 0.006192552391439676, 'timestamp': '2025-10-01 04:36:03.282102', 'step': 18658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:03.313304', 'step': 18658, 'epoch': 3} {'type': 'loss', 'content': 0.07384257763624191, 'timestamp': '2025-10-01 04:36:03.315596', 'step': 18659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:03.346130', 'step': 18659, 'epoch': 3} {'type': 'loss', 'content': 0.07032644748687744, 'timestamp': '2025-10-01 04:36:03.369667', 'step': 18660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:03.404223', 'step': 18660, 'epoch': 3} {'type': 'loss', 'content': 0.10599255561828613, 'timestamp': '2025-10-01 04:36:03.410803', 'step': 18661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:03.442316', 'step': 18661, 'epoch': 3} {'type': 'loss', 'content': 0.03346339985728264, 'timestamp': '2025-10-01 04:36:03.444419', 'step': 18662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:03.475460', 'step': 18662, 'epoch': 3} {'type': 'loss', 'content': 0.10859611630439758, 'timestamp': '2025-10-01 04:36:03.477624', 'step': 18663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:03.507992', 'step': 18663, 'epoch': 3} {'type': 'loss', 'content': 0.10986305773258209, 'timestamp': '2025-10-01 04:36:03.542542', 'step': 18664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:03.575069', 'step': 18664, 'epoch': 3} {'type': 'loss', 'content': 0.06832943111658096, 'timestamp': '2025-10-01 04:36:03.577238', 'step': 18665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:03.608446', 'step': 18665, 'epoch': 3} {'type': 'loss', 'content': 0.08401674777269363, 'timestamp': '2025-10-01 04:36:03.617757', 'step': 18666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:03.648329', 'step': 18666, 'epoch': 3} {'type': 'loss', 'content': 0.06944876909255981, 'timestamp': '2025-10-01 04:36:03.650596', 'step': 18667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:03.681199', 'step': 18667, 'epoch': 3} {'type': 'loss', 'content': 0.06949426233768463, 'timestamp': '2025-10-01 04:36:03.707955', 'step': 18668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:03.740215', 'step': 18668, 'epoch': 3} {'type': 'loss', 'content': 0.1137649416923523, 'timestamp': '2025-10-01 04:36:03.742399', 'step': 18669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:03.772685', 'step': 18669, 'epoch': 3} {'type': 'loss', 'content': 0.08922438323497772, 'timestamp': '2025-10-01 04:36:03.775044', 'step': 18670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:03.805705', 'step': 18670, 'epoch': 3} {'type': 'loss', 'content': 0.08317825943231583, 'timestamp': '2025-10-01 04:36:03.808119', 'step': 18671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:03.838897', 'step': 18671, 'epoch': 3} {'type': 'loss', 'content': 0.02138800546526909, 'timestamp': '2025-10-01 04:36:03.863147', 'step': 18672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:03.894035', 'step': 18672, 'epoch': 3} {'type': 'loss', 'content': 0.06476595252752304, 'timestamp': '2025-10-01 04:36:03.896412', 'step': 18673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:03.927421', 'step': 18673, 'epoch': 3} {'type': 'loss', 'content': 0.06467543542385101, 'timestamp': '2025-10-01 04:36:03.929565', 'step': 18674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:03.961582', 'step': 18674, 'epoch': 3} {'type': 'loss', 'content': 0.08853349834680557, 'timestamp': '2025-10-01 04:36:03.963834', 'step': 18675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:03.994994', 'step': 18675, 'epoch': 3} {'type': 'loss', 'content': 0.05110441893339157, 'timestamp': '2025-10-01 04:36:04.018623', 'step': 18676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:04.049366', 'step': 18676, 'epoch': 3} {'type': 'loss', 'content': 0.05661623552441597, 'timestamp': '2025-10-01 04:36:04.052015', 'step': 18677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:36:04.082881', 'step': 18677, 'epoch': 3} {'type': 'loss', 'content': 0.09034920483827591, 'timestamp': '2025-10-01 04:36:04.087497', 'step': 18678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:04.118962', 'step': 18678, 'epoch': 3} {'type': 'loss', 'content': 0.046965934336185455, 'timestamp': '2025-10-01 04:36:04.121130', 'step': 18679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:04.152451', 'step': 18679, 'epoch': 3} {'type': 'loss', 'content': 0.03241552412509918, 'timestamp': '2025-10-01 04:36:04.176137', 'step': 18680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:04.209353', 'step': 18680, 'epoch': 3} {'type': 'loss', 'content': 0.033728860318660736, 'timestamp': '2025-10-01 04:36:04.211635', 'step': 18681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:04.243684', 'step': 18681, 'epoch': 3} {'type': 'loss', 'content': 0.08101695030927658, 'timestamp': '2025-10-01 04:36:04.250878', 'step': 18682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:04.282900', 'step': 18682, 'epoch': 3} {'type': 'loss', 'content': 0.0521199107170105, 'timestamp': '2025-10-01 04:36:04.285164', 'step': 18683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:04.316000', 'step': 18683, 'epoch': 3} {'type': 'loss', 'content': 0.05953186750411987, 'timestamp': '2025-10-01 04:36:04.340333', 'step': 18684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:04.371846', 'step': 18684, 'epoch': 3} {'type': 'loss', 'content': 0.035658709704875946, 'timestamp': '2025-10-01 04:36:04.374682', 'step': 18685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:04.407089', 'step': 18685, 'epoch': 3} {'type': 'loss', 'content': 0.06942100077867508, 'timestamp': '2025-10-01 04:36:04.409903', 'step': 18686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:04.441143', 'step': 18686, 'epoch': 3} {'type': 'loss', 'content': 0.02347467839717865, 'timestamp': '2025-10-01 04:36:04.447331', 'step': 18687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:04.478158', 'step': 18687, 'epoch': 3} {'type': 'loss', 'content': 0.09657822549343109, 'timestamp': '2025-10-01 04:36:04.503111', 'step': 18688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:04.534542', 'step': 18688, 'epoch': 3} {'type': 'loss', 'content': 0.08798394352197647, 'timestamp': '2025-10-01 04:36:04.537193', 'step': 18689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:04.568684', 'step': 18689, 'epoch': 3} {'type': 'loss', 'content': 0.045321013778448105, 'timestamp': '2025-10-01 04:36:04.571593', 'step': 18690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:04.603423', 'step': 18690, 'epoch': 3} {'type': 'loss', 'content': 0.0339558944106102, 'timestamp': '2025-10-01 04:36:04.605990', 'step': 18691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:04.637528', 'step': 18691, 'epoch': 3} {'type': 'loss', 'content': 0.08814415335655212, 'timestamp': '2025-10-01 04:36:04.661374', 'step': 18692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:04.692207', 'step': 18692, 'epoch': 3} {'type': 'loss', 'content': 0.027188178151845932, 'timestamp': '2025-10-01 04:36:04.694889', 'step': 18693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:04.725793', 'step': 18693, 'epoch': 3} {'type': 'loss', 'content': 0.11125452816486359, 'timestamp': '2025-10-01 04:36:04.728189', 'step': 18694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:04.763106', 'step': 18694, 'epoch': 3} {'type': 'loss', 'content': 0.036108195781707764, 'timestamp': '2025-10-01 04:36:04.766169', 'step': 18695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:04.798303', 'step': 18695, 'epoch': 3} {'type': 'loss', 'content': 0.023711133748292923, 'timestamp': '2025-10-01 04:36:04.822490', 'step': 18696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:04.853997', 'step': 18696, 'epoch': 3} {'type': 'loss', 'content': 0.06345648318529129, 'timestamp': '2025-10-01 04:36:04.856475', 'step': 18697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:04.887879', 'step': 18697, 'epoch': 3} {'type': 'loss', 'content': 0.07983065396547318, 'timestamp': '2025-10-01 04:36:04.890245', 'step': 18698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:04.924573', 'step': 18698, 'epoch': 3} {'type': 'loss', 'content': 0.030875137075781822, 'timestamp': '2025-10-01 04:36:04.927477', 'step': 18699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:04.960228', 'step': 18699, 'epoch': 3} {'type': 'loss', 'content': 0.05575624108314514, 'timestamp': '2025-10-01 04:36:04.984194', 'step': 18700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:05.016769', 'step': 18700, 'epoch': 3} {'type': 'loss', 'content': 0.0449083037674427, 'timestamp': '2025-10-01 04:36:05.020223', 'step': 18701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:05.051889', 'step': 18701, 'epoch': 3} {'type': 'loss', 'content': 0.044472359120845795, 'timestamp': '2025-10-01 04:36:05.054632', 'step': 18702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:05.090206', 'step': 18702, 'epoch': 3} {'type': 'loss', 'content': 0.06641048192977905, 'timestamp': '2025-10-01 04:36:05.092863', 'step': 18703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:05.123121', 'step': 18703, 'epoch': 3} {'type': 'loss', 'content': 0.04443296790122986, 'timestamp': '2025-10-01 04:36:05.147160', 'step': 18704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:05.177942', 'step': 18704, 'epoch': 3} {'type': 'loss', 'content': 0.08117298781871796, 'timestamp': '2025-10-01 04:36:05.181285', 'step': 18705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:05.213350', 'step': 18705, 'epoch': 3} {'type': 'loss', 'content': 0.02181069180369377, 'timestamp': '2025-10-01 04:36:05.216170', 'step': 18706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:05.247160', 'step': 18706, 'epoch': 3} {'type': 'loss', 'content': 0.08692846447229385, 'timestamp': '2025-10-01 04:36:05.250103', 'step': 18707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:36:05.282454', 'step': 18707, 'epoch': 3} {'type': 'loss', 'content': 0.11745541542768478, 'timestamp': '2025-10-01 04:36:05.307865', 'step': 18708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:05.338410', 'step': 18708, 'epoch': 3} {'type': 'loss', 'content': 0.06474132835865021, 'timestamp': '2025-10-01 04:36:05.341225', 'step': 18709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:05.372168', 'step': 18709, 'epoch': 3} {'type': 'loss', 'content': 0.030454983934760094, 'timestamp': '2025-10-01 04:36:05.374647', 'step': 18710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:05.405595', 'step': 18710, 'epoch': 3} {'type': 'loss', 'content': 0.09249492734670639, 'timestamp': '2025-10-01 04:36:05.408215', 'step': 18711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:05.439869', 'step': 18711, 'epoch': 3} {'type': 'loss', 'content': 0.04323970153927803, 'timestamp': '2025-10-01 04:36:05.463633', 'step': 18712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:05.494701', 'step': 18712, 'epoch': 3} {'type': 'loss', 'content': 0.08170391619205475, 'timestamp': '2025-10-01 04:36:05.497141', 'step': 18713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:05.528573', 'step': 18713, 'epoch': 3} {'type': 'loss', 'content': 0.09368939697742462, 'timestamp': '2025-10-01 04:36:05.532200', 'step': 18714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:05.563291', 'step': 18714, 'epoch': 3} {'type': 'loss', 'content': 0.117198646068573, 'timestamp': '2025-10-01 04:36:05.566294', 'step': 18715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:05.596909', 'step': 18715, 'epoch': 3} {'type': 'loss', 'content': 0.03648780286312103, 'timestamp': '2025-10-01 04:36:05.620808', 'step': 18716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:05.656826', 'step': 18716, 'epoch': 3} {'type': 'loss', 'content': 0.016744030639529228, 'timestamp': '2025-10-01 04:36:05.661321', 'step': 18717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:05.692769', 'step': 18717, 'epoch': 3} {'type': 'loss', 'content': 0.031608689576387405, 'timestamp': '2025-10-01 04:36:05.694701', 'step': 18718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:05.730680', 'step': 18718, 'epoch': 3} {'type': 'loss', 'content': 0.11692804098129272, 'timestamp': '2025-10-01 04:36:05.732816', 'step': 18719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:05.764091', 'step': 18719, 'epoch': 3} {'type': 'loss', 'content': 0.05745403841137886, 'timestamp': '2025-10-01 04:36:05.787755', 'step': 18720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:05.818382', 'step': 18720, 'epoch': 3} {'type': 'loss', 'content': 0.03519515320658684, 'timestamp': '2025-10-01 04:36:05.820748', 'step': 18721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:05.863330', 'step': 18721, 'epoch': 3} {'type': 'loss', 'content': 0.06329468637704849, 'timestamp': '2025-10-01 04:36:05.865679', 'step': 18722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:05.897275', 'step': 18722, 'epoch': 3} {'type': 'loss', 'content': 0.049846064299345016, 'timestamp': '2025-10-01 04:36:05.899510', 'step': 18723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:05.930202', 'step': 18723, 'epoch': 3} {'type': 'loss', 'content': 0.14375151693820953, 'timestamp': '2025-10-01 04:36:05.954037', 'step': 18724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:05.985243', 'step': 18724, 'epoch': 3} {'type': 'loss', 'content': 0.022210150957107544, 'timestamp': '2025-10-01 04:36:05.988081', 'step': 18725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:06.018760', 'step': 18725, 'epoch': 3} {'type': 'loss', 'content': 0.046012766659259796, 'timestamp': '2025-10-01 04:36:06.021006', 'step': 18726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:06.051914', 'step': 18726, 'epoch': 3} {'type': 'loss', 'content': 0.05003329738974571, 'timestamp': '2025-10-01 04:36:06.054633', 'step': 18727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:06.085564', 'step': 18727, 'epoch': 3} {'type': 'loss', 'content': 0.01393973734229803, 'timestamp': '2025-10-01 04:36:06.109838', 'step': 18728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:06.142580', 'step': 18728, 'epoch': 3} {'type': 'loss', 'content': 0.09647781401872635, 'timestamp': '2025-10-01 04:36:06.144880', 'step': 18729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:06.194094', 'step': 18729, 'epoch': 3} {'type': 'loss', 'content': 0.0649724155664444, 'timestamp': '2025-10-01 04:36:06.199552', 'step': 18730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:06.230374', 'step': 18730, 'epoch': 3} {'type': 'loss', 'content': 0.059873033314943314, 'timestamp': '2025-10-01 04:36:06.233521', 'step': 18731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:06.263824', 'step': 18731, 'epoch': 3} {'type': 'loss', 'content': 0.1115889698266983, 'timestamp': '2025-10-01 04:36:06.287643', 'step': 18732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:06.319144', 'step': 18732, 'epoch': 3} {'type': 'loss', 'content': 0.06422403454780579, 'timestamp': '2025-10-01 04:36:06.321438', 'step': 18733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:06.353643', 'step': 18733, 'epoch': 3} {'type': 'loss', 'content': 0.020465049892663956, 'timestamp': '2025-10-01 04:36:06.355898', 'step': 18734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:06.387551', 'step': 18734, 'epoch': 3} {'type': 'loss', 'content': 0.04309644177556038, 'timestamp': '2025-10-01 04:36:06.389962', 'step': 18735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:06.421179', 'step': 18735, 'epoch': 3} {'type': 'loss', 'content': 0.059076081961393356, 'timestamp': '2025-10-01 04:36:06.445025', 'step': 18736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:06.476768', 'step': 18736, 'epoch': 3} {'type': 'loss', 'content': 0.09019901603460312, 'timestamp': '2025-10-01 04:36:06.481722', 'step': 18737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:06.513613', 'step': 18737, 'epoch': 3} {'type': 'loss', 'content': 0.010951141826808453, 'timestamp': '2025-10-01 04:36:06.519130', 'step': 18738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:06.549877', 'step': 18738, 'epoch': 3} {'type': 'loss', 'content': 0.1045798808336258, 'timestamp': '2025-10-01 04:36:06.552129', 'step': 18739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:06.584144', 'step': 18739, 'epoch': 3} {'type': 'loss', 'content': 0.06268570572137833, 'timestamp': '2025-10-01 04:36:06.608049', 'step': 18740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:06.639185', 'step': 18740, 'epoch': 3} {'type': 'loss', 'content': 0.07835037261247635, 'timestamp': '2025-10-01 04:36:06.641181', 'step': 18741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:06.672817', 'step': 18741, 'epoch': 3} {'type': 'loss', 'content': 0.09069399535655975, 'timestamp': '2025-10-01 04:36:06.675216', 'step': 18742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:06.706834', 'step': 18742, 'epoch': 3} {'type': 'loss', 'content': 0.01959090493619442, 'timestamp': '2025-10-01 04:36:06.709363', 'step': 18743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:06.741030', 'step': 18743, 'epoch': 3} {'type': 'loss', 'content': 0.09180780500173569, 'timestamp': '2025-10-01 04:36:06.764852', 'step': 18744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:06.796987', 'step': 18744, 'epoch': 3} {'type': 'loss', 'content': 0.04594281315803528, 'timestamp': '2025-10-01 04:36:06.800140', 'step': 18745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:06.832086', 'step': 18745, 'epoch': 3} {'type': 'loss', 'content': 0.10822246223688126, 'timestamp': '2025-10-01 04:36:06.835029', 'step': 18746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:06.867146', 'step': 18746, 'epoch': 3} {'type': 'loss', 'content': 0.02428230457007885, 'timestamp': '2025-10-01 04:36:06.869452', 'step': 18747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:06.901967', 'step': 18747, 'epoch': 3} {'type': 'loss', 'content': 0.11376728862524033, 'timestamp': '2025-10-01 04:36:06.925827', 'step': 18748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:06.956570', 'step': 18748, 'epoch': 3} {'type': 'loss', 'content': 0.06416994333267212, 'timestamp': '2025-10-01 04:36:06.958649', 'step': 18749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:06.988694', 'step': 18749, 'epoch': 3} {'type': 'loss', 'content': 0.05789409205317497, 'timestamp': '2025-10-01 04:36:06.990944', 'step': 18750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:07.021174', 'step': 18750, 'epoch': 3} {'type': 'loss', 'content': 0.03942503780126572, 'timestamp': '2025-10-01 04:36:07.023552', 'step': 18751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:07.054443', 'step': 18751, 'epoch': 3} {'type': 'loss', 'content': 0.11022429913282394, 'timestamp': '2025-10-01 04:36:07.078302', 'step': 18752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:07.108725', 'step': 18752, 'epoch': 3} {'type': 'loss', 'content': 0.07615623623132706, 'timestamp': '2025-10-01 04:36:07.111016', 'step': 18753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:07.141669', 'step': 18753, 'epoch': 3} {'type': 'loss', 'content': 0.08933905512094498, 'timestamp': '2025-10-01 04:36:07.148155', 'step': 18754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:07.178970', 'step': 18754, 'epoch': 3} {'type': 'loss', 'content': 0.05609450116753578, 'timestamp': '2025-10-01 04:36:07.180955', 'step': 18755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:07.211233', 'step': 18755, 'epoch': 3} {'type': 'loss', 'content': 0.09090612828731537, 'timestamp': '2025-10-01 04:36:07.235013', 'step': 18756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:07.268306', 'step': 18756, 'epoch': 3} {'type': 'loss', 'content': 0.06984084099531174, 'timestamp': '2025-10-01 04:36:07.282331', 'step': 18757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:07.326598', 'step': 18757, 'epoch': 3} {'type': 'loss', 'content': 0.07079765200614929, 'timestamp': '2025-10-01 04:36:07.329047', 'step': 18758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:07.360018', 'step': 18758, 'epoch': 3} {'type': 'loss', 'content': 0.03882722184062004, 'timestamp': '2025-10-01 04:36:07.362533', 'step': 18759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:07.393418', 'step': 18759, 'epoch': 3} {'type': 'loss', 'content': 0.054233942180871964, 'timestamp': '2025-10-01 04:36:07.417221', 'step': 18760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:07.450237', 'step': 18760, 'epoch': 3} {'type': 'loss', 'content': 0.06466063857078552, 'timestamp': '2025-10-01 04:36:07.452752', 'step': 18761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:07.483893', 'step': 18761, 'epoch': 3} {'type': 'loss', 'content': 0.047947030514478683, 'timestamp': '2025-10-01 04:36:07.486331', 'step': 18762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:07.517606', 'step': 18762, 'epoch': 3} {'type': 'loss', 'content': 0.010668878443539143, 'timestamp': '2025-10-01 04:36:07.519854', 'step': 18763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:07.556575', 'step': 18763, 'epoch': 3} {'type': 'loss', 'content': 0.10450126975774765, 'timestamp': '2025-10-01 04:36:07.580430', 'step': 18764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:07.613262', 'step': 18764, 'epoch': 3} {'type': 'loss', 'content': 0.11221696436405182, 'timestamp': '2025-10-01 04:36:07.615442', 'step': 18765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:07.648200', 'step': 18765, 'epoch': 3} {'type': 'loss', 'content': 0.017535990104079247, 'timestamp': '2025-10-01 04:36:07.650608', 'step': 18766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:07.683708', 'step': 18766, 'epoch': 3} {'type': 'loss', 'content': 0.11490114778280258, 'timestamp': '2025-10-01 04:36:07.686236', 'step': 18767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:07.717711', 'step': 18767, 'epoch': 3} {'type': 'loss', 'content': 0.08998411148786545, 'timestamp': '2025-10-01 04:36:07.741447', 'step': 18768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:07.779947', 'step': 18768, 'epoch': 3} {'type': 'loss', 'content': 0.08991535753011703, 'timestamp': '2025-10-01 04:36:07.782166', 'step': 18769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:07.813643', 'step': 18769, 'epoch': 3} {'type': 'loss', 'content': 0.07047618925571442, 'timestamp': '2025-10-01 04:36:07.816155', 'step': 18770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:07.849428', 'step': 18770, 'epoch': 3} {'type': 'loss', 'content': 0.0617719404399395, 'timestamp': '2025-10-01 04:36:07.853133', 'step': 18771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:07.884702', 'step': 18771, 'epoch': 3} {'type': 'loss', 'content': 0.04858074337244034, 'timestamp': '2025-10-01 04:36:07.908552', 'step': 18772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:07.943455', 'step': 18772, 'epoch': 3} {'type': 'loss', 'content': 0.10301697254180908, 'timestamp': '2025-10-01 04:36:07.945930', 'step': 18773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:07.977295', 'step': 18773, 'epoch': 3} {'type': 'loss', 'content': 0.08693400770425797, 'timestamp': '2025-10-01 04:36:07.981712', 'step': 18774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:08.015160', 'step': 18774, 'epoch': 3} {'type': 'loss', 'content': 0.100406214594841, 'timestamp': '2025-10-01 04:36:08.017382', 'step': 18775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:08.048940', 'step': 18775, 'epoch': 3} {'type': 'loss', 'content': 0.10259804874658585, 'timestamp': '2025-10-01 04:36:08.073609', 'step': 18776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:08.105683', 'step': 18776, 'epoch': 3} {'type': 'loss', 'content': 0.09047351032495499, 'timestamp': '2025-10-01 04:36:08.108727', 'step': 18777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:08.139808', 'step': 18777, 'epoch': 3} {'type': 'loss', 'content': 0.05224267393350601, 'timestamp': '2025-10-01 04:36:08.143161', 'step': 18778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:08.176496', 'step': 18778, 'epoch': 3} {'type': 'loss', 'content': 0.04468556120991707, 'timestamp': '2025-10-01 04:36:08.178866', 'step': 18779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:08.215243', 'step': 18779, 'epoch': 3} {'type': 'loss', 'content': 0.054376520216464996, 'timestamp': '2025-10-01 04:36:08.240072', 'step': 18780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:08.275507', 'step': 18780, 'epoch': 3} {'type': 'loss', 'content': 0.08663950115442276, 'timestamp': '2025-10-01 04:36:08.277843', 'step': 18781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:08.309803', 'step': 18781, 'epoch': 3} {'type': 'loss', 'content': 0.07521422207355499, 'timestamp': '2025-10-01 04:36:08.312831', 'step': 18782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:08.348373', 'step': 18782, 'epoch': 3} {'type': 'loss', 'content': 0.0784791111946106, 'timestamp': '2025-10-01 04:36:08.350608', 'step': 18783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:08.382152', 'step': 18783, 'epoch': 3} {'type': 'loss', 'content': 0.04890887066721916, 'timestamp': '2025-10-01 04:36:08.405986', 'step': 18784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:08.440164', 'step': 18784, 'epoch': 3} {'type': 'loss', 'content': 0.11009573191404343, 'timestamp': '2025-10-01 04:36:08.442484', 'step': 18785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:08.479719', 'step': 18785, 'epoch': 3} {'type': 'loss', 'content': 0.15892146527767181, 'timestamp': '2025-10-01 04:36:08.482032', 'step': 18786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:08.513344', 'step': 18786, 'epoch': 3} {'type': 'loss', 'content': 0.030277466401457787, 'timestamp': '2025-10-01 04:36:08.516270', 'step': 18787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:08.548139', 'step': 18787, 'epoch': 3} {'type': 'loss', 'content': 0.03162175044417381, 'timestamp': '2025-10-01 04:36:08.572039', 'step': 18788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:08.609941', 'step': 18788, 'epoch': 3} {'type': 'loss', 'content': 0.04383876919746399, 'timestamp': '2025-10-01 04:36:08.612218', 'step': 18789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:08.643977', 'step': 18789, 'epoch': 3} {'type': 'loss', 'content': 0.07151176780462265, 'timestamp': '2025-10-01 04:36:08.646338', 'step': 18790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:08.686887', 'step': 18790, 'epoch': 3} {'type': 'loss', 'content': 0.01365286111831665, 'timestamp': '2025-10-01 04:36:08.689139', 'step': 18791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:08.720533', 'step': 18791, 'epoch': 3} {'type': 'loss', 'content': 0.0726797878742218, 'timestamp': '2025-10-01 04:36:08.744406', 'step': 18792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:08.789904', 'step': 18792, 'epoch': 3} {'type': 'loss', 'content': 0.10658380389213562, 'timestamp': '2025-10-01 04:36:08.792303', 'step': 18793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:08.832786', 'step': 18793, 'epoch': 3} {'type': 'loss', 'content': 0.07844871282577515, 'timestamp': '2025-10-01 04:36:08.835440', 'step': 18794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:08.868620', 'step': 18794, 'epoch': 3} {'type': 'loss', 'content': 0.037167441099882126, 'timestamp': '2025-10-01 04:36:08.871421', 'step': 18795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:08.904737', 'step': 18795, 'epoch': 3} {'type': 'loss', 'content': 0.031013866886496544, 'timestamp': '2025-10-01 04:36:08.928505', 'step': 18796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:08.970799', 'step': 18796, 'epoch': 3} {'type': 'loss', 'content': 0.02102081850171089, 'timestamp': '2025-10-01 04:36:08.972896', 'step': 18797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:09.006567', 'step': 18797, 'epoch': 3} {'type': 'loss', 'content': 0.04213981702923775, 'timestamp': '2025-10-01 04:36:09.008896', 'step': 18798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:09.041126', 'step': 18798, 'epoch': 3} {'type': 'loss', 'content': 0.10420308262109756, 'timestamp': '2025-10-01 04:36:09.043431', 'step': 18799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:09.074143', 'step': 18799, 'epoch': 3} {'type': 'loss', 'content': 0.045278314501047134, 'timestamp': '2025-10-01 04:36:09.098032', 'step': 18800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:09.130690', 'step': 18800, 'epoch': 3} {'type': 'loss', 'content': 0.10071296989917755, 'timestamp': '2025-10-01 04:36:09.133134', 'step': 18801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:09.164359', 'step': 18801, 'epoch': 3} {'type': 'loss', 'content': 0.05800943076610565, 'timestamp': '2025-10-01 04:36:09.166593', 'step': 18802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:09.198045', 'step': 18802, 'epoch': 3} {'type': 'loss', 'content': 0.03322672098875046, 'timestamp': '2025-10-01 04:36:09.200760', 'step': 18803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:09.232175', 'step': 18803, 'epoch': 3} {'type': 'loss', 'content': 0.10016505420207977, 'timestamp': '2025-10-01 04:36:09.256009', 'step': 18804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:09.289267', 'step': 18804, 'epoch': 3} {'type': 'loss', 'content': 0.10709372162818909, 'timestamp': '2025-10-01 04:36:09.291522', 'step': 18805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:09.322612', 'step': 18805, 'epoch': 3} {'type': 'loss', 'content': 0.04673175513744354, 'timestamp': '2025-10-01 04:36:09.330019', 'step': 18806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:09.361424', 'step': 18806, 'epoch': 3} {'type': 'loss', 'content': 0.06679749488830566, 'timestamp': '2025-10-01 04:36:09.363851', 'step': 18807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:09.410919', 'step': 18807, 'epoch': 3} {'type': 'loss', 'content': 0.10119563341140747, 'timestamp': '2025-10-01 04:36:09.435426', 'step': 18808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:09.466871', 'step': 18808, 'epoch': 3} {'type': 'loss', 'content': 0.08278031647205353, 'timestamp': '2025-10-01 04:36:09.469258', 'step': 18809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:09.500687', 'step': 18809, 'epoch': 3} {'type': 'loss', 'content': 0.09677832573652267, 'timestamp': '2025-10-01 04:36:09.504035', 'step': 18810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:09.535287', 'step': 18810, 'epoch': 3} {'type': 'loss', 'content': 0.09213188290596008, 'timestamp': '2025-10-01 04:36:09.537630', 'step': 18811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:09.569738', 'step': 18811, 'epoch': 3} {'type': 'loss', 'content': 0.08601190894842148, 'timestamp': '2025-10-01 04:36:09.593821', 'step': 18812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:09.625510', 'step': 18812, 'epoch': 3} {'type': 'loss', 'content': 0.03616487234830856, 'timestamp': '2025-10-01 04:36:09.633159', 'step': 18813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:09.663685', 'step': 18813, 'epoch': 3} {'type': 'loss', 'content': 0.09067222476005554, 'timestamp': '2025-10-01 04:36:09.666074', 'step': 18814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:09.697030', 'step': 18814, 'epoch': 3} {'type': 'loss', 'content': 0.07981400191783905, 'timestamp': '2025-10-01 04:36:09.699919', 'step': 18815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:09.731948', 'step': 18815, 'epoch': 3} {'type': 'loss', 'content': 0.04385530948638916, 'timestamp': '2025-10-01 04:36:09.756167', 'step': 18816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:09.787879', 'step': 18816, 'epoch': 3} {'type': 'loss', 'content': 0.06805442273616791, 'timestamp': '2025-10-01 04:36:09.790173', 'step': 18817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:09.822744', 'step': 18817, 'epoch': 3} {'type': 'loss', 'content': 0.06889338046312332, 'timestamp': '2025-10-01 04:36:09.825479', 'step': 18818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:09.856524', 'step': 18818, 'epoch': 3} {'type': 'loss', 'content': 0.0838293731212616, 'timestamp': '2025-10-01 04:36:09.859344', 'step': 18819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:09.890351', 'step': 18819, 'epoch': 3} {'type': 'loss', 'content': 0.12773795425891876, 'timestamp': '2025-10-01 04:36:09.914161', 'step': 18820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:09.945043', 'step': 18820, 'epoch': 3} {'type': 'loss', 'content': 0.12597067654132843, 'timestamp': '2025-10-01 04:36:09.947557', 'step': 18821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:09.983014', 'step': 18821, 'epoch': 3} {'type': 'loss', 'content': 0.025806354358792305, 'timestamp': '2025-10-01 04:36:09.985183', 'step': 18822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:10.016879', 'step': 18822, 'epoch': 3} {'type': 'loss', 'content': 0.06666911393404007, 'timestamp': '2025-10-01 04:36:10.019021', 'step': 18823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:10.051092', 'step': 18823, 'epoch': 3} {'type': 'loss', 'content': 0.035191610455513, 'timestamp': '2025-10-01 04:36:10.078992', 'step': 18824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:10.112689', 'step': 18824, 'epoch': 3} {'type': 'loss', 'content': 0.08576281368732452, 'timestamp': '2025-10-01 04:36:10.115341', 'step': 18825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:10.146753', 'step': 18825, 'epoch': 3} {'type': 'loss', 'content': 0.08474370092153549, 'timestamp': '2025-10-01 04:36:10.149197', 'step': 18826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:10.181871', 'step': 18826, 'epoch': 3} {'type': 'loss', 'content': 0.053800493478775024, 'timestamp': '2025-10-01 04:36:10.184351', 'step': 18827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:10.215182', 'step': 18827, 'epoch': 3} {'type': 'loss', 'content': 0.02030896209180355, 'timestamp': '2025-10-01 04:36:10.239192', 'step': 18828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:10.271398', 'step': 18828, 'epoch': 3} {'type': 'loss', 'content': 0.15385198593139648, 'timestamp': '2025-10-01 04:36:10.273585', 'step': 18829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:10.304803', 'step': 18829, 'epoch': 3} {'type': 'loss', 'content': 0.0716077908873558, 'timestamp': '2025-10-01 04:36:10.307286', 'step': 18830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:10.338405', 'step': 18830, 'epoch': 3} {'type': 'loss', 'content': 0.045631784945726395, 'timestamp': '2025-10-01 04:36:10.340377', 'step': 18831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:10.373158', 'step': 18831, 'epoch': 3} {'type': 'loss', 'content': 0.05839664489030838, 'timestamp': '2025-10-01 04:36:10.397561', 'step': 18832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:10.429857', 'step': 18832, 'epoch': 3} {'type': 'loss', 'content': 0.10465316474437714, 'timestamp': '2025-10-01 04:36:10.431893', 'step': 18833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:10.462935', 'step': 18833, 'epoch': 3} {'type': 'loss', 'content': 0.04850007966160774, 'timestamp': '2025-10-01 04:36:10.465147', 'step': 18834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:10.496606', 'step': 18834, 'epoch': 3} {'type': 'loss', 'content': 0.07671895623207092, 'timestamp': '2025-10-01 04:36:10.499043', 'step': 18835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:10.530621', 'step': 18835, 'epoch': 3} {'type': 'loss', 'content': 0.03594997525215149, 'timestamp': '2025-10-01 04:36:10.554520', 'step': 18836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:10.586101', 'step': 18836, 'epoch': 3} {'type': 'loss', 'content': 0.06544096767902374, 'timestamp': '2025-10-01 04:36:10.588625', 'step': 18837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:10.619001', 'step': 18837, 'epoch': 3} {'type': 'loss', 'content': 0.12279913574457169, 'timestamp': '2025-10-01 04:36:10.621513', 'step': 18838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:10.654244', 'step': 18838, 'epoch': 3} {'type': 'loss', 'content': 0.12082085013389587, 'timestamp': '2025-10-01 04:36:10.656622', 'step': 18839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:10.687049', 'step': 18839, 'epoch': 3} {'type': 'loss', 'content': 0.07800916582345963, 'timestamp': '2025-10-01 04:36:10.711058', 'step': 18840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:10.742950', 'step': 18840, 'epoch': 3} {'type': 'loss', 'content': 0.08971750736236572, 'timestamp': '2025-10-01 04:36:10.745258', 'step': 18841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:10.785613', 'step': 18841, 'epoch': 3} {'type': 'loss', 'content': 0.09355472028255463, 'timestamp': '2025-10-01 04:36:10.789271', 'step': 18842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:10.831585', 'step': 18842, 'epoch': 3} {'type': 'loss', 'content': 0.0827450379729271, 'timestamp': '2025-10-01 04:36:10.834422', 'step': 18843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:10.864678', 'step': 18843, 'epoch': 3} {'type': 'loss', 'content': 0.09406514465808868, 'timestamp': '2025-10-01 04:36:10.888716', 'step': 18844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:10.920606', 'step': 18844, 'epoch': 3} {'type': 'loss', 'content': 0.043282583355903625, 'timestamp': '2025-10-01 04:36:10.923044', 'step': 18845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:10.956969', 'step': 18845, 'epoch': 3} {'type': 'loss', 'content': 0.08666247129440308, 'timestamp': '2025-10-01 04:36:10.959317', 'step': 18846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:10.990219', 'step': 18846, 'epoch': 3} {'type': 'loss', 'content': 0.01333761028945446, 'timestamp': '2025-10-01 04:36:10.992516', 'step': 18847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:11.023734', 'step': 18847, 'epoch': 3} {'type': 'loss', 'content': 0.04164034500718117, 'timestamp': '2025-10-01 04:36:11.047575', 'step': 18848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:11.077836', 'step': 18848, 'epoch': 3} {'type': 'loss', 'content': 0.08320193737745285, 'timestamp': '2025-10-01 04:36:11.080169', 'step': 18849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:11.118169', 'step': 18849, 'epoch': 3} {'type': 'loss', 'content': 0.04113536328077316, 'timestamp': '2025-10-01 04:36:11.120640', 'step': 18850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:11.151834', 'step': 18850, 'epoch': 3} {'type': 'loss', 'content': 0.10426845401525497, 'timestamp': '2025-10-01 04:36:11.154086', 'step': 18851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:11.185145', 'step': 18851, 'epoch': 3} {'type': 'loss', 'content': 0.0634583905339241, 'timestamp': '2025-10-01 04:36:11.209356', 'step': 18852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:11.240117', 'step': 18852, 'epoch': 3} {'type': 'loss', 'content': 0.07012472301721573, 'timestamp': '2025-10-01 04:36:11.242983', 'step': 18853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:11.273425', 'step': 18853, 'epoch': 3} {'type': 'loss', 'content': 0.06748714298009872, 'timestamp': '2025-10-01 04:36:11.275670', 'step': 18854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:11.306079', 'step': 18854, 'epoch': 3} {'type': 'loss', 'content': 0.08411705493927002, 'timestamp': '2025-10-01 04:36:11.308381', 'step': 18855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:11.345090', 'step': 18855, 'epoch': 3} {'type': 'loss', 'content': 0.03237496688961983, 'timestamp': '2025-10-01 04:36:11.369760', 'step': 18856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:11.401711', 'step': 18856, 'epoch': 3} {'type': 'loss', 'content': 0.10485048592090607, 'timestamp': '2025-10-01 04:36:11.403897', 'step': 18857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:11.434554', 'step': 18857, 'epoch': 3} {'type': 'loss', 'content': 0.11737939715385437, 'timestamp': '2025-10-01 04:36:11.436758', 'step': 18858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:11.467496', 'step': 18858, 'epoch': 3} {'type': 'loss', 'content': 0.06371305137872696, 'timestamp': '2025-10-01 04:36:11.470716', 'step': 18859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:11.501911', 'step': 18859, 'epoch': 3} {'type': 'loss', 'content': 0.08923352509737015, 'timestamp': '2025-10-01 04:36:11.525983', 'step': 18860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:11.556669', 'step': 18860, 'epoch': 3} {'type': 'loss', 'content': 0.054347824305295944, 'timestamp': '2025-10-01 04:36:11.559413', 'step': 18861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:11.590143', 'step': 18861, 'epoch': 3} {'type': 'loss', 'content': 0.10980027914047241, 'timestamp': '2025-10-01 04:36:11.592879', 'step': 18862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:11.627995', 'step': 18862, 'epoch': 3} {'type': 'loss', 'content': 0.08172868192195892, 'timestamp': '2025-10-01 04:36:11.630369', 'step': 18863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:11.668688', 'step': 18863, 'epoch': 3} {'type': 'loss', 'content': 0.041470833122730255, 'timestamp': '2025-10-01 04:36:11.692644', 'step': 18864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:11.743600', 'step': 18864, 'epoch': 3} {'type': 'loss', 'content': 0.09348055720329285, 'timestamp': '2025-10-01 04:36:11.745920', 'step': 18865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:11.781183', 'step': 18865, 'epoch': 3} {'type': 'loss', 'content': 0.11030718684196472, 'timestamp': '2025-10-01 04:36:11.783446', 'step': 18866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:11.831854', 'step': 18866, 'epoch': 3} {'type': 'loss', 'content': 0.1444394588470459, 'timestamp': '2025-10-01 04:36:11.834981', 'step': 18867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:11.882263', 'step': 18867, 'epoch': 3} {'type': 'loss', 'content': 0.11898407340049744, 'timestamp': '2025-10-01 04:36:11.905955', 'step': 18868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:11.943102', 'step': 18868, 'epoch': 3} {'type': 'loss', 'content': 0.04548121243715286, 'timestamp': '2025-10-01 04:36:11.945245', 'step': 18869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:11.980030', 'step': 18869, 'epoch': 3} {'type': 'loss', 'content': 0.09958479553461075, 'timestamp': '2025-10-01 04:36:11.982099', 'step': 18870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:12.030549', 'step': 18870, 'epoch': 3} {'type': 'loss', 'content': 0.1639704555273056, 'timestamp': '2025-10-01 04:36:12.032657', 'step': 18871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:12.064705', 'step': 18871, 'epoch': 3} {'type': 'loss', 'content': 0.050549644976854324, 'timestamp': '2025-10-01 04:36:12.088378', 'step': 18872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:12.133059', 'step': 18872, 'epoch': 3} {'type': 'loss', 'content': 0.12494222819805145, 'timestamp': '2025-10-01 04:36:12.142748', 'step': 18873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:12.175656', 'step': 18873, 'epoch': 3} {'type': 'loss', 'content': 0.016242273151874542, 'timestamp': '2025-10-01 04:36:12.178090', 'step': 18874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:12.222388', 'step': 18874, 'epoch': 3} {'type': 'loss', 'content': 0.04290655255317688, 'timestamp': '2025-10-01 04:36:12.224737', 'step': 18875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:12.263037', 'step': 18875, 'epoch': 3} {'type': 'loss', 'content': 0.036753226071596146, 'timestamp': '2025-10-01 04:36:12.288370', 'step': 18876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:12.320629', 'step': 18876, 'epoch': 3} {'type': 'loss', 'content': 0.03979416564106941, 'timestamp': '2025-10-01 04:36:12.322847', 'step': 18877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:12.356295', 'step': 18877, 'epoch': 3} {'type': 'loss', 'content': 0.13325710594654083, 'timestamp': '2025-10-01 04:36:12.376071', 'step': 18878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:12.414055', 'step': 18878, 'epoch': 3} {'type': 'loss', 'content': 0.07547885179519653, 'timestamp': '2025-10-01 04:36:12.416306', 'step': 18879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:12.457888', 'step': 18879, 'epoch': 3} {'type': 'loss', 'content': 0.03320827707648277, 'timestamp': '2025-10-01 04:36:12.481726', 'step': 18880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:12.521146', 'step': 18880, 'epoch': 3} {'type': 'loss', 'content': 0.1059689149260521, 'timestamp': '2025-10-01 04:36:12.523920', 'step': 18881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:12.559152', 'step': 18881, 'epoch': 3} {'type': 'loss', 'content': 0.03475949540734291, 'timestamp': '2025-10-01 04:36:12.561521', 'step': 18882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:12.609016', 'step': 18882, 'epoch': 3} {'type': 'loss', 'content': 0.06856195628643036, 'timestamp': '2025-10-01 04:36:12.611734', 'step': 18883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:12.649369', 'step': 18883, 'epoch': 3} {'type': 'loss', 'content': 0.07998967170715332, 'timestamp': '2025-10-01 04:36:12.673429', 'step': 18884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:12.713574', 'step': 18884, 'epoch': 3} {'type': 'loss', 'content': 0.055876608937978745, 'timestamp': '2025-10-01 04:36:12.717186', 'step': 18885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:12.752431', 'step': 18885, 'epoch': 3} {'type': 'loss', 'content': 0.17527401447296143, 'timestamp': '2025-10-01 04:36:12.754804', 'step': 18886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:12.791951', 'step': 18886, 'epoch': 3} {'type': 'loss', 'content': 0.050115540623664856, 'timestamp': '2025-10-01 04:36:12.808509', 'step': 18887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:12.853957', 'step': 18887, 'epoch': 3} {'type': 'loss', 'content': 0.03491659462451935, 'timestamp': '2025-10-01 04:36:12.878236', 'step': 18888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:12.934037', 'step': 18888, 'epoch': 3} {'type': 'loss', 'content': 0.1308256834745407, 'timestamp': '2025-10-01 04:36:12.936813', 'step': 18889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:12.976950', 'step': 18889, 'epoch': 3} {'type': 'loss', 'content': 0.14939668774604797, 'timestamp': '2025-10-01 04:36:12.979341', 'step': 18890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:13.028830', 'step': 18890, 'epoch': 3} {'type': 'loss', 'content': 0.07051015645265579, 'timestamp': '2025-10-01 04:36:13.031765', 'step': 18891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:13.073801', 'step': 18891, 'epoch': 3} {'type': 'loss', 'content': 0.07704554498195648, 'timestamp': '2025-10-01 04:36:13.102639', 'step': 18892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:13.137716', 'step': 18892, 'epoch': 3} {'type': 'loss', 'content': 0.12851934134960175, 'timestamp': '2025-10-01 04:36:13.139881', 'step': 18893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:13.179377', 'step': 18893, 'epoch': 3} {'type': 'loss', 'content': 0.07023683935403824, 'timestamp': '2025-10-01 04:36:13.181692', 'step': 18894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:13.217725', 'step': 18894, 'epoch': 3} {'type': 'loss', 'content': 0.052713543176651, 'timestamp': '2025-10-01 04:36:13.220101', 'step': 18895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:13.256367', 'step': 18895, 'epoch': 3} {'type': 'loss', 'content': 0.05958018824458122, 'timestamp': '2025-10-01 04:36:13.280878', 'step': 18896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:13.319625', 'step': 18896, 'epoch': 3} {'type': 'loss', 'content': 0.10428805649280548, 'timestamp': '2025-10-01 04:36:13.323641', 'step': 18897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:13.360654', 'step': 18897, 'epoch': 3} {'type': 'loss', 'content': 0.0587134025990963, 'timestamp': '2025-10-01 04:36:13.363392', 'step': 18898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:13.403540', 'step': 18898, 'epoch': 3} {'type': 'loss', 'content': 0.13151413202285767, 'timestamp': '2025-10-01 04:36:13.407365', 'step': 18899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:13.445316', 'step': 18899, 'epoch': 3} {'type': 'loss', 'content': 0.024149050936102867, 'timestamp': '2025-10-01 04:36:13.469666', 'step': 18900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:13.502423', 'step': 18900, 'epoch': 3} {'type': 'loss', 'content': 0.08347692340612411, 'timestamp': '2025-10-01 04:36:13.504818', 'step': 18901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:36:13.538462', 'step': 18901, 'epoch': 3} {'type': 'loss', 'content': 0.08609305322170258, 'timestamp': '2025-10-01 04:36:13.543153', 'step': 18902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:13.589057', 'step': 18902, 'epoch': 3} {'type': 'loss', 'content': 0.06259601563215256, 'timestamp': '2025-10-01 04:36:13.596131', 'step': 18903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:13.635163', 'step': 18903, 'epoch': 3} {'type': 'loss', 'content': 0.06002813205122948, 'timestamp': '2025-10-01 04:36:13.661083', 'step': 18904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:13.693454', 'step': 18904, 'epoch': 3} {'type': 'loss', 'content': 0.05249692127108574, 'timestamp': '2025-10-01 04:36:13.704373', 'step': 18905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:13.746351', 'step': 18905, 'epoch': 3} {'type': 'loss', 'content': 0.029650267213582993, 'timestamp': '2025-10-01 04:36:13.749640', 'step': 18906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:13.792108', 'step': 18906, 'epoch': 3} {'type': 'loss', 'content': 0.11560825258493423, 'timestamp': '2025-10-01 04:36:13.794601', 'step': 18907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:13.835917', 'step': 18907, 'epoch': 3} {'type': 'loss', 'content': 0.04503980651497841, 'timestamp': '2025-10-01 04:36:13.859841', 'step': 18908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:13.892276', 'step': 18908, 'epoch': 3} {'type': 'loss', 'content': 0.0483221709728241, 'timestamp': '2025-10-01 04:36:13.894961', 'step': 18909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:36:13.928721', 'step': 18909, 'epoch': 3} {'type': 'loss', 'content': 0.06786934286355972, 'timestamp': '2025-10-01 04:36:13.932969', 'step': 18910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:13.972552', 'step': 18910, 'epoch': 3} {'type': 'loss', 'content': 0.03540835902094841, 'timestamp': '2025-10-01 04:36:13.975320', 'step': 18911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:14.012354', 'step': 18911, 'epoch': 3} {'type': 'loss', 'content': 0.030877219513058662, 'timestamp': '2025-10-01 04:36:14.036526', 'step': 18912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:14.067797', 'step': 18912, 'epoch': 3} {'type': 'loss', 'content': 0.040365319699048996, 'timestamp': '2025-10-01 04:36:14.070624', 'step': 18913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:14.114100', 'step': 18913, 'epoch': 3} {'type': 'loss', 'content': 0.06575878709554672, 'timestamp': '2025-10-01 04:36:14.116791', 'step': 18914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:14.153006', 'step': 18914, 'epoch': 3} {'type': 'loss', 'content': 0.029668882489204407, 'timestamp': '2025-10-01 04:36:14.155773', 'step': 18915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:14.199227', 'step': 18915, 'epoch': 3} {'type': 'loss', 'content': 0.10104983299970627, 'timestamp': '2025-10-01 04:36:14.223086', 'step': 18916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:14.282790', 'step': 18916, 'epoch': 3} {'type': 'loss', 'content': 0.018930988386273384, 'timestamp': '2025-10-01 04:36:14.291757', 'step': 18917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:14.331045', 'step': 18917, 'epoch': 3} {'type': 'loss', 'content': 0.09398485720157623, 'timestamp': '2025-10-01 04:36:14.339981', 'step': 18918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:14.373189', 'step': 18918, 'epoch': 3} {'type': 'loss', 'content': 0.07955560833215714, 'timestamp': '2025-10-01 04:36:14.375637', 'step': 18919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:14.407255', 'step': 18919, 'epoch': 3} {'type': 'loss', 'content': 0.12965619564056396, 'timestamp': '2025-10-01 04:36:14.438922', 'step': 18920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:14.477076', 'step': 18920, 'epoch': 3} {'type': 'loss', 'content': 0.04507502168416977, 'timestamp': '2025-10-01 04:36:14.479135', 'step': 18921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:14.510683', 'step': 18921, 'epoch': 3} {'type': 'loss', 'content': 0.009209630079567432, 'timestamp': '2025-10-01 04:36:14.513107', 'step': 18922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:14.548521', 'step': 18922, 'epoch': 3} {'type': 'loss', 'content': 0.035330720245838165, 'timestamp': '2025-10-01 04:36:14.561318', 'step': 18923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:14.604113', 'step': 18923, 'epoch': 3} {'type': 'loss', 'content': 0.05247574299573898, 'timestamp': '2025-10-01 04:36:14.628901', 'step': 18924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:14.667616', 'step': 18924, 'epoch': 3} {'type': 'loss', 'content': 0.20941440761089325, 'timestamp': '2025-10-01 04:36:14.670907', 'step': 18925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:14.707806', 'step': 18925, 'epoch': 3} {'type': 'loss', 'content': 0.10668261349201202, 'timestamp': '2025-10-01 04:36:14.710039', 'step': 18926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:14.755947', 'step': 18926, 'epoch': 3} {'type': 'loss', 'content': 0.04427788406610489, 'timestamp': '2025-10-01 04:36:14.758142', 'step': 18927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:14.790270', 'step': 18927, 'epoch': 3} {'type': 'loss', 'content': 0.05554436892271042, 'timestamp': '2025-10-01 04:36:14.814065', 'step': 18928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:14.846813', 'step': 18928, 'epoch': 3} {'type': 'loss', 'content': 0.1298406422138214, 'timestamp': '2025-10-01 04:36:14.851738', 'step': 18929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:14.884974', 'step': 18929, 'epoch': 3} {'type': 'loss', 'content': 0.047948699444532394, 'timestamp': '2025-10-01 04:36:14.887236', 'step': 18930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:14.934273', 'step': 18930, 'epoch': 3} {'type': 'loss', 'content': 0.02274385094642639, 'timestamp': '2025-10-01 04:36:14.936602', 'step': 18931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:14.981887', 'step': 18931, 'epoch': 3} {'type': 'loss', 'content': 0.06127076596021652, 'timestamp': '2025-10-01 04:36:15.005696', 'step': 18932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:15.037801', 'step': 18932, 'epoch': 3} {'type': 'loss', 'content': 0.10599876940250397, 'timestamp': '2025-10-01 04:36:15.040106', 'step': 18933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:15.072002', 'step': 18933, 'epoch': 3} {'type': 'loss', 'content': 0.05254321172833443, 'timestamp': '2025-10-01 04:36:15.074226', 'step': 18934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:15.116971', 'step': 18934, 'epoch': 3} {'type': 'loss', 'content': 0.06456983089447021, 'timestamp': '2025-10-01 04:36:15.120149', 'step': 18935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:15.161469', 'step': 18935, 'epoch': 3} {'type': 'loss', 'content': 0.05205700546503067, 'timestamp': '2025-10-01 04:36:15.185070', 'step': 18936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:15.224071', 'step': 18936, 'epoch': 3} {'type': 'loss', 'content': 0.06856537610292435, 'timestamp': '2025-10-01 04:36:15.226283', 'step': 18937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:15.260135', 'step': 18937, 'epoch': 3} {'type': 'loss', 'content': 0.09433166682720184, 'timestamp': '2025-10-01 04:36:15.262218', 'step': 18938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:15.296083', 'step': 18938, 'epoch': 3} {'type': 'loss', 'content': 0.11883421242237091, 'timestamp': '2025-10-01 04:36:15.298459', 'step': 18939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:15.335593', 'step': 18939, 'epoch': 3} {'type': 'loss', 'content': 0.020733429118990898, 'timestamp': '2025-10-01 04:36:15.360558', 'step': 18940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:15.395612', 'step': 18940, 'epoch': 3} {'type': 'loss', 'content': 0.10030066221952438, 'timestamp': '2025-10-01 04:36:15.397830', 'step': 18941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:15.430146', 'step': 18941, 'epoch': 3} {'type': 'loss', 'content': 0.12366694957017899, 'timestamp': '2025-10-01 04:36:15.432449', 'step': 18942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:15.464311', 'step': 18942, 'epoch': 3} {'type': 'loss', 'content': 0.06552845239639282, 'timestamp': '2025-10-01 04:36:15.467025', 'step': 18943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:15.522123', 'step': 18943, 'epoch': 3} {'type': 'loss', 'content': 0.06582890450954437, 'timestamp': '2025-10-01 04:36:15.545752', 'step': 18944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:15.580300', 'step': 18944, 'epoch': 3} {'type': 'loss', 'content': 0.03307357057929039, 'timestamp': '2025-10-01 04:36:15.582542', 'step': 18945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:15.618094', 'step': 18945, 'epoch': 3} {'type': 'loss', 'content': 0.09044236689805984, 'timestamp': '2025-10-01 04:36:15.629391', 'step': 18946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:15.670876', 'step': 18946, 'epoch': 3} {'type': 'loss', 'content': 0.015952477231621742, 'timestamp': '2025-10-01 04:36:15.673605', 'step': 18947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:15.707595', 'step': 18947, 'epoch': 3} {'type': 'loss', 'content': 0.04802491143345833, 'timestamp': '2025-10-01 04:36:15.731405', 'step': 18948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:15.765845', 'step': 18948, 'epoch': 3} {'type': 'loss', 'content': 0.04729009047150612, 'timestamp': '2025-10-01 04:36:15.768025', 'step': 18949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:15.801069', 'step': 18949, 'epoch': 3} {'type': 'loss', 'content': 0.07383379340171814, 'timestamp': '2025-10-01 04:36:15.803326', 'step': 18950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:15.836627', 'step': 18950, 'epoch': 3} {'type': 'loss', 'content': 0.04904450476169586, 'timestamp': '2025-10-01 04:36:15.839134', 'step': 18951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:15.875220', 'step': 18951, 'epoch': 3} {'type': 'loss', 'content': 0.08479636907577515, 'timestamp': '2025-10-01 04:36:15.900004', 'step': 18952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:15.932523', 'step': 18952, 'epoch': 3} {'type': 'loss', 'content': 0.104745052754879, 'timestamp': '2025-10-01 04:36:15.934770', 'step': 18953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:15.967783', 'step': 18953, 'epoch': 3} {'type': 'loss', 'content': 0.0390973836183548, 'timestamp': '2025-10-01 04:36:15.973115', 'step': 18954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.005530', 'step': 18954, 'epoch': 3} {'type': 'loss', 'content': 0.06716958433389664, 'timestamp': '2025-10-01 04:36:16.007796', 'step': 18955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:16.041054', 'step': 18955, 'epoch': 3} {'type': 'loss', 'content': 0.07538548856973648, 'timestamp': '2025-10-01 04:36:16.079246', 'step': 18956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.133581', 'step': 18956, 'epoch': 3} {'type': 'loss', 'content': 0.04540480673313141, 'timestamp': '2025-10-01 04:36:16.137182', 'step': 18957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.198562', 'step': 18957, 'epoch': 3} {'type': 'loss', 'content': 0.045997560024261475, 'timestamp': '2025-10-01 04:36:16.200589', 'step': 18958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.249981', 'step': 18958, 'epoch': 3} {'type': 'loss', 'content': 0.06979349255561829, 'timestamp': '2025-10-01 04:36:16.251773', 'step': 18959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:16.291430', 'step': 18959, 'epoch': 3} {'type': 'loss', 'content': 0.037707969546318054, 'timestamp': '2025-10-01 04:36:16.315352', 'step': 18960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.354843', 'step': 18960, 'epoch': 3} {'type': 'loss', 'content': 0.09547536820173264, 'timestamp': '2025-10-01 04:36:16.356848', 'step': 18961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.388334', 'step': 18961, 'epoch': 3} {'type': 'loss', 'content': 0.06363514810800552, 'timestamp': '2025-10-01 04:36:16.390182', 'step': 18962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.421816', 'step': 18962, 'epoch': 3} {'type': 'loss', 'content': 0.07100256532430649, 'timestamp': '2025-10-01 04:36:16.438167', 'step': 18963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:16.472429', 'step': 18963, 'epoch': 3} {'type': 'loss', 'content': 0.03429252281785011, 'timestamp': '2025-10-01 04:36:16.495752', 'step': 18964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:16.530975', 'step': 18964, 'epoch': 3} {'type': 'loss', 'content': 0.04076184332370758, 'timestamp': '2025-10-01 04:36:16.532993', 'step': 18965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.577718', 'step': 18965, 'epoch': 3} {'type': 'loss', 'content': 0.026829693466424942, 'timestamp': '2025-10-01 04:36:16.580574', 'step': 18966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:16.613737', 'step': 18966, 'epoch': 3} {'type': 'loss', 'content': 0.04078928008675575, 'timestamp': '2025-10-01 04:36:16.620801', 'step': 18967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:16.653907', 'step': 18967, 'epoch': 3} {'type': 'loss', 'content': 0.07825208455324173, 'timestamp': '2025-10-01 04:36:16.677458', 'step': 18968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.722538', 'step': 18968, 'epoch': 3} {'type': 'loss', 'content': 0.031920891255140305, 'timestamp': '2025-10-01 04:36:16.725021', 'step': 18969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.756601', 'step': 18969, 'epoch': 3} {'type': 'loss', 'content': 0.12475784122943878, 'timestamp': '2025-10-01 04:36:16.758909', 'step': 18970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.789389', 'step': 18970, 'epoch': 3} {'type': 'loss', 'content': 0.03563539311289787, 'timestamp': '2025-10-01 04:36:16.792275', 'step': 18971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:16.839122', 'step': 18971, 'epoch': 3} {'type': 'loss', 'content': 0.11042024195194244, 'timestamp': '2025-10-01 04:36:16.865130', 'step': 18972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:16.900393', 'step': 18972, 'epoch': 3} {'type': 'loss', 'content': 0.013384507037699223, 'timestamp': '2025-10-01 04:36:16.902769', 'step': 18973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:16.935032', 'step': 18973, 'epoch': 3} {'type': 'loss', 'content': 0.07410499453544617, 'timestamp': '2025-10-01 04:36:16.943859', 'step': 18974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:16.982383', 'step': 18974, 'epoch': 3} {'type': 'loss', 'content': 0.0387626588344574, 'timestamp': '2025-10-01 04:36:16.984520', 'step': 18975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.025129', 'step': 18975, 'epoch': 3} {'type': 'loss', 'content': 0.02741198055446148, 'timestamp': '2025-10-01 04:36:17.048750', 'step': 18976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.081504', 'step': 18976, 'epoch': 3} {'type': 'loss', 'content': 0.08935621380805969, 'timestamp': '2025-10-01 04:36:17.084272', 'step': 18977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.116041', 'step': 18977, 'epoch': 3} {'type': 'loss', 'content': 0.07784978300333023, 'timestamp': '2025-10-01 04:36:17.119257', 'step': 18978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:17.155686', 'step': 18978, 'epoch': 3} {'type': 'loss', 'content': 0.07784665375947952, 'timestamp': '2025-10-01 04:36:17.158083', 'step': 18979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.190133', 'step': 18979, 'epoch': 3} {'type': 'loss', 'content': 0.08512189239263535, 'timestamp': '2025-10-01 04:36:17.213618', 'step': 18980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:17.247074', 'step': 18980, 'epoch': 3} {'type': 'loss', 'content': 0.07124699652194977, 'timestamp': '2025-10-01 04:36:17.249065', 'step': 18981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:17.289010', 'step': 18981, 'epoch': 3} {'type': 'loss', 'content': 0.09039086848497391, 'timestamp': '2025-10-01 04:36:17.291114', 'step': 18982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.322831', 'step': 18982, 'epoch': 3} {'type': 'loss', 'content': 0.025949109345674515, 'timestamp': '2025-10-01 04:36:17.324963', 'step': 18983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:17.358660', 'step': 18983, 'epoch': 3} {'type': 'loss', 'content': 0.0734776109457016, 'timestamp': '2025-10-01 04:36:17.382256', 'step': 18984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.427630', 'step': 18984, 'epoch': 3} {'type': 'loss', 'content': 0.03686167672276497, 'timestamp': '2025-10-01 04:36:17.429902', 'step': 18985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.464196', 'step': 18985, 'epoch': 3} {'type': 'loss', 'content': 0.026536524295806885, 'timestamp': '2025-10-01 04:36:17.467176', 'step': 18986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:17.500499', 'step': 18986, 'epoch': 3} {'type': 'loss', 'content': 0.06412293761968613, 'timestamp': '2025-10-01 04:36:17.502808', 'step': 18987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.535575', 'step': 18987, 'epoch': 3} {'type': 'loss', 'content': 0.018496587872505188, 'timestamp': '2025-10-01 04:36:17.559590', 'step': 18988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:17.599479', 'step': 18988, 'epoch': 3} {'type': 'loss', 'content': 0.09449592977762222, 'timestamp': '2025-10-01 04:36:17.601994', 'step': 18989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:17.634970', 'step': 18989, 'epoch': 3} {'type': 'loss', 'content': 0.030855560675263405, 'timestamp': '2025-10-01 04:36:17.637785', 'step': 18990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.668646', 'step': 18990, 'epoch': 3} {'type': 'loss', 'content': 0.016020426526665688, 'timestamp': '2025-10-01 04:36:17.671027', 'step': 18991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.707821', 'step': 18991, 'epoch': 3} {'type': 'loss', 'content': 0.07447720319032669, 'timestamp': '2025-10-01 04:36:17.731696', 'step': 18992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.763778', 'step': 18992, 'epoch': 3} {'type': 'loss', 'content': 0.03398066386580467, 'timestamp': '2025-10-01 04:36:17.766919', 'step': 18993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:17.808749', 'step': 18993, 'epoch': 3} {'type': 'loss', 'content': 0.012306555174291134, 'timestamp': '2025-10-01 04:36:17.811094', 'step': 18994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:17.842792', 'step': 18994, 'epoch': 3} {'type': 'loss', 'content': 0.03729120269417763, 'timestamp': '2025-10-01 04:36:17.845097', 'step': 18995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:17.878958', 'step': 18995, 'epoch': 3} {'type': 'loss', 'content': 0.06434103846549988, 'timestamp': '2025-10-01 04:36:17.903015', 'step': 18996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:17.938190', 'step': 18996, 'epoch': 3} {'type': 'loss', 'content': 0.061688728630542755, 'timestamp': '2025-10-01 04:36:17.941498', 'step': 18997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:17.979188', 'step': 18997, 'epoch': 3} {'type': 'loss', 'content': 0.020546244457364082, 'timestamp': '2025-10-01 04:36:17.984622', 'step': 18998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:18.016549', 'step': 18998, 'epoch': 3} {'type': 'loss', 'content': 0.019479678943753242, 'timestamp': '2025-10-01 04:36:18.020034', 'step': 18999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:18.053463', 'step': 18999, 'epoch': 3} {'type': 'loss', 'content': 0.05222409591078758, 'timestamp': '2025-10-01 04:36:18.077171', 'step': 19000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19000', 'timestamp': '2025-10-01 04:36:23.072726', 'step': 19000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.109866', 'step': 19000, 'epoch': 3} {'type': 'loss', 'content': 0.03729848191142082, 'timestamp': '2025-10-01 04:36:23.112037', 'step': 19001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.143913', 'step': 19001, 'epoch': 3} {'type': 'loss', 'content': 0.08964017033576965, 'timestamp': '2025-10-01 04:36:23.146216', 'step': 19002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.178140', 'step': 19002, 'epoch': 3} {'type': 'loss', 'content': 0.04307614639401436, 'timestamp': '2025-10-01 04:36:23.180510', 'step': 19003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:23.226512', 'step': 19003, 'epoch': 3} {'type': 'loss', 'content': 0.049507055431604385, 'timestamp': '2025-10-01 04:36:23.250523', 'step': 19004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:23.281751', 'step': 19004, 'epoch': 3} {'type': 'loss', 'content': 0.052222929894924164, 'timestamp': '2025-10-01 04:36:23.284010', 'step': 19005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.314261', 'step': 19005, 'epoch': 3} {'type': 'loss', 'content': 0.06590954214334488, 'timestamp': '2025-10-01 04:36:23.316456', 'step': 19006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:23.346186', 'step': 19006, 'epoch': 3} {'type': 'loss', 'content': 0.14041069149971008, 'timestamp': '2025-10-01 04:36:23.348522', 'step': 19007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:23.389296', 'step': 19007, 'epoch': 3} {'type': 'loss', 'content': 0.05415176600217819, 'timestamp': '2025-10-01 04:36:23.413052', 'step': 19008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.442688', 'step': 19008, 'epoch': 3} {'type': 'loss', 'content': 0.03537187725305557, 'timestamp': '2025-10-01 04:36:23.444868', 'step': 19009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:23.475783', 'step': 19009, 'epoch': 3} {'type': 'loss', 'content': 0.022394530475139618, 'timestamp': '2025-10-01 04:36:23.477862', 'step': 19010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:23.508702', 'step': 19010, 'epoch': 3} {'type': 'loss', 'content': 0.020896652713418007, 'timestamp': '2025-10-01 04:36:23.511052', 'step': 19011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:23.541684', 'step': 19011, 'epoch': 3} {'type': 'loss', 'content': 0.027081307023763657, 'timestamp': '2025-10-01 04:36:23.565708', 'step': 19012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:23.600177', 'step': 19012, 'epoch': 3} {'type': 'loss', 'content': 0.016476549208164215, 'timestamp': '2025-10-01 04:36:23.602538', 'step': 19013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.632207', 'step': 19013, 'epoch': 3} {'type': 'loss', 'content': 0.0330747552216053, 'timestamp': '2025-10-01 04:36:23.634493', 'step': 19014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:23.665800', 'step': 19014, 'epoch': 3} {'type': 'loss', 'content': 0.03131372109055519, 'timestamp': '2025-10-01 04:36:23.668053', 'step': 19015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.698074', 'step': 19015, 'epoch': 3} {'type': 'loss', 'content': 0.09670086205005646, 'timestamp': '2025-10-01 04:36:23.722496', 'step': 19016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.754992', 'step': 19016, 'epoch': 3} {'type': 'loss', 'content': 0.03538820520043373, 'timestamp': '2025-10-01 04:36:23.757232', 'step': 19017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:23.793014', 'step': 19017, 'epoch': 3} {'type': 'loss', 'content': 0.088936947286129, 'timestamp': '2025-10-01 04:36:23.795811', 'step': 19018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.849549', 'step': 19018, 'epoch': 3} {'type': 'loss', 'content': 0.11153096705675125, 'timestamp': '2025-10-01 04:36:23.851922', 'step': 19019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.883043', 'step': 19019, 'epoch': 3} {'type': 'loss', 'content': 0.012245290912687778, 'timestamp': '2025-10-01 04:36:23.906942', 'step': 19020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:23.939716', 'step': 19020, 'epoch': 3} {'type': 'loss', 'content': 0.01126615609973669, 'timestamp': '2025-10-01 04:36:23.942048', 'step': 19021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:23.973477', 'step': 19021, 'epoch': 3} {'type': 'loss', 'content': 0.059871021658182144, 'timestamp': '2025-10-01 04:36:23.975881', 'step': 19022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:24.006067', 'step': 19022, 'epoch': 3} {'type': 'loss', 'content': 0.08983050286769867, 'timestamp': '2025-10-01 04:36:24.008413', 'step': 19023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:24.040609', 'step': 19023, 'epoch': 3} {'type': 'loss', 'content': 0.040643081068992615, 'timestamp': '2025-10-01 04:36:24.064270', 'step': 19024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:24.096100', 'step': 19024, 'epoch': 3} {'type': 'loss', 'content': 0.033756159245967865, 'timestamp': '2025-10-01 04:36:24.098639', 'step': 19025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:24.129460', 'step': 19025, 'epoch': 3} {'type': 'loss', 'content': 0.14049729704856873, 'timestamp': '2025-10-01 04:36:24.131652', 'step': 19026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:24.163953', 'step': 19026, 'epoch': 3} {'type': 'loss', 'content': 0.11980512738227844, 'timestamp': '2025-10-01 04:36:24.166239', 'step': 19027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:24.198388', 'step': 19027, 'epoch': 3} {'type': 'loss', 'content': 0.049240656197071075, 'timestamp': '2025-10-01 04:36:24.222118', 'step': 19028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:24.252580', 'step': 19028, 'epoch': 3} {'type': 'loss', 'content': 0.03983423486351967, 'timestamp': '2025-10-01 04:36:24.254870', 'step': 19029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:24.285790', 'step': 19029, 'epoch': 3} {'type': 'loss', 'content': 0.04825856164097786, 'timestamp': '2025-10-01 04:36:24.288125', 'step': 19030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:24.320298', 'step': 19030, 'epoch': 3} {'type': 'loss', 'content': 0.1357697993516922, 'timestamp': '2025-10-01 04:36:24.322764', 'step': 19031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:24.353800', 'step': 19031, 'epoch': 3} {'type': 'loss', 'content': 0.027918441221117973, 'timestamp': '2025-10-01 04:36:24.377863', 'step': 19032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:24.409448', 'step': 19032, 'epoch': 3} {'type': 'loss', 'content': 0.03215096890926361, 'timestamp': '2025-10-01 04:36:24.412074', 'step': 19033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:24.444545', 'step': 19033, 'epoch': 3} {'type': 'loss', 'content': 0.05629304423928261, 'timestamp': '2025-10-01 04:36:24.447617', 'step': 19034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:24.479063', 'step': 19034, 'epoch': 3} {'type': 'loss', 'content': 0.05435076728463173, 'timestamp': '2025-10-01 04:36:24.481570', 'step': 19035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:24.521026', 'step': 19035, 'epoch': 3} {'type': 'loss', 'content': 0.0034613285679370165, 'timestamp': '2025-10-01 04:36:24.545388', 'step': 19036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:24.585481', 'step': 19036, 'epoch': 3} {'type': 'loss', 'content': 0.10845109075307846, 'timestamp': '2025-10-01 04:36:24.587826', 'step': 19037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:24.619054', 'step': 19037, 'epoch': 3} {'type': 'loss', 'content': 0.018304316326975822, 'timestamp': '2025-10-01 04:36:24.621362', 'step': 19038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:24.652969', 'step': 19038, 'epoch': 3} {'type': 'loss', 'content': 0.1306603103876114, 'timestamp': '2025-10-01 04:36:24.655351', 'step': 19039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:24.686929', 'step': 19039, 'epoch': 3} {'type': 'loss', 'content': 0.009194358251988888, 'timestamp': '2025-10-01 04:36:24.710813', 'step': 19040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:24.742343', 'step': 19040, 'epoch': 3} {'type': 'loss', 'content': 0.11427639424800873, 'timestamp': '2025-10-01 04:36:24.759774', 'step': 19041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:24.799413', 'step': 19041, 'epoch': 3} {'type': 'loss', 'content': 0.034184448421001434, 'timestamp': '2025-10-01 04:36:24.801742', 'step': 19042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:24.835781', 'step': 19042, 'epoch': 3} {'type': 'loss', 'content': 0.03181552514433861, 'timestamp': '2025-10-01 04:36:24.838083', 'step': 19043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:24.870820', 'step': 19043, 'epoch': 3} {'type': 'loss', 'content': 0.04636421799659729, 'timestamp': '2025-10-01 04:36:24.894853', 'step': 19044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:24.925790', 'step': 19044, 'epoch': 3} {'type': 'loss', 'content': 0.06830059736967087, 'timestamp': '2025-10-01 04:36:24.928062', 'step': 19045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:24.959261', 'step': 19045, 'epoch': 3} {'type': 'loss', 'content': 0.0812150314450264, 'timestamp': '2025-10-01 04:36:24.961755', 'step': 19046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:24.992354', 'step': 19046, 'epoch': 3} {'type': 'loss', 'content': 0.04761475324630737, 'timestamp': '2025-10-01 04:36:24.994880', 'step': 19047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:25.024802', 'step': 19047, 'epoch': 3} {'type': 'loss', 'content': 0.03865063562989235, 'timestamp': '2025-10-01 04:36:25.048622', 'step': 19048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:25.079702', 'step': 19048, 'epoch': 3} {'type': 'loss', 'content': 0.015283137559890747, 'timestamp': '2025-10-01 04:36:25.082029', 'step': 19049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:25.112807', 'step': 19049, 'epoch': 3} {'type': 'loss', 'content': 0.08351630717515945, 'timestamp': '2025-10-01 04:36:25.115139', 'step': 19050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:25.145779', 'step': 19050, 'epoch': 3} {'type': 'loss', 'content': 0.041882604360580444, 'timestamp': '2025-10-01 04:36:25.148116', 'step': 19051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:25.186566', 'step': 19051, 'epoch': 3} {'type': 'loss', 'content': 0.028882639482617378, 'timestamp': '2025-10-01 04:36:25.210472', 'step': 19052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:25.243534', 'step': 19052, 'epoch': 3} {'type': 'loss', 'content': 0.09049765020608902, 'timestamp': '2025-10-01 04:36:25.245787', 'step': 19053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:25.276828', 'step': 19053, 'epoch': 3} {'type': 'loss', 'content': 0.08244622498750687, 'timestamp': '2025-10-01 04:36:25.279069', 'step': 19054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:25.310522', 'step': 19054, 'epoch': 3} {'type': 'loss', 'content': 0.03287981450557709, 'timestamp': '2025-10-01 04:36:25.312844', 'step': 19055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:25.344190', 'step': 19055, 'epoch': 3} {'type': 'loss', 'content': 0.07950855791568756, 'timestamp': '2025-10-01 04:36:25.367938', 'step': 19056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:25.398406', 'step': 19056, 'epoch': 3} {'type': 'loss', 'content': 0.09245855361223221, 'timestamp': '2025-10-01 04:36:25.400639', 'step': 19057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:25.442556', 'step': 19057, 'epoch': 3} {'type': 'loss', 'content': 0.10336513072252274, 'timestamp': '2025-10-01 04:36:25.445070', 'step': 19058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:25.484815', 'step': 19058, 'epoch': 3} {'type': 'loss', 'content': 0.10714684426784515, 'timestamp': '2025-10-01 04:36:25.487081', 'step': 19059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:25.526686', 'step': 19059, 'epoch': 3} {'type': 'loss', 'content': 0.04122885316610336, 'timestamp': '2025-10-01 04:36:25.551406', 'step': 19060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:25.583597', 'step': 19060, 'epoch': 3} {'type': 'loss', 'content': 0.028719762340188026, 'timestamp': '2025-10-01 04:36:25.585998', 'step': 19061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:25.617811', 'step': 19061, 'epoch': 3} {'type': 'loss', 'content': 0.035752635449171066, 'timestamp': '2025-10-01 04:36:25.620099', 'step': 19062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:25.650342', 'step': 19062, 'epoch': 3} {'type': 'loss', 'content': 0.07173691689968109, 'timestamp': '2025-10-01 04:36:25.652751', 'step': 19063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:25.682800', 'step': 19063, 'epoch': 3} {'type': 'loss', 'content': 0.034348633140325546, 'timestamp': '2025-10-01 04:36:25.709241', 'step': 19064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:25.739482', 'step': 19064, 'epoch': 3} {'type': 'loss', 'content': 0.03773451969027519, 'timestamp': '2025-10-01 04:36:25.741986', 'step': 19065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:25.771665', 'step': 19065, 'epoch': 3} {'type': 'loss', 'content': 0.06521829217672348, 'timestamp': '2025-10-01 04:36:25.773915', 'step': 19066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:25.804203', 'step': 19066, 'epoch': 3} {'type': 'loss', 'content': 0.02935129962861538, 'timestamp': '2025-10-01 04:36:25.814653', 'step': 19067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:25.850691', 'step': 19067, 'epoch': 3} {'type': 'loss', 'content': 0.047877758741378784, 'timestamp': '2025-10-01 04:36:25.874457', 'step': 19068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:25.904568', 'step': 19068, 'epoch': 3} {'type': 'loss', 'content': 0.057620588690042496, 'timestamp': '2025-10-01 04:36:25.906884', 'step': 19069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:25.948699', 'step': 19069, 'epoch': 3} {'type': 'loss', 'content': 0.02660498395562172, 'timestamp': '2025-10-01 04:36:25.951069', 'step': 19070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:25.981781', 'step': 19070, 'epoch': 3} {'type': 'loss', 'content': 0.07751476019620895, 'timestamp': '2025-10-01 04:36:25.984146', 'step': 19071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:26.013901', 'step': 19071, 'epoch': 3} {'type': 'loss', 'content': 0.03900393098592758, 'timestamp': '2025-10-01 04:36:26.037643', 'step': 19072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:26.069226', 'step': 19072, 'epoch': 3} {'type': 'loss', 'content': 0.07966228574514389, 'timestamp': '2025-10-01 04:36:26.071482', 'step': 19073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:26.101931', 'step': 19073, 'epoch': 3} {'type': 'loss', 'content': 0.04085538536310196, 'timestamp': '2025-10-01 04:36:26.105367', 'step': 19074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.136464', 'step': 19074, 'epoch': 3} {'type': 'loss', 'content': 0.009205990470945835, 'timestamp': '2025-10-01 04:36:26.139013', 'step': 19075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.169150', 'step': 19075, 'epoch': 3} {'type': 'loss', 'content': 0.022100355476140976, 'timestamp': '2025-10-01 04:36:26.193372', 'step': 19076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:26.224216', 'step': 19076, 'epoch': 3} {'type': 'loss', 'content': 0.1383269727230072, 'timestamp': '2025-10-01 04:36:26.226389', 'step': 19077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:26.264628', 'step': 19077, 'epoch': 3} {'type': 'loss', 'content': 0.06828448176383972, 'timestamp': '2025-10-01 04:36:26.267277', 'step': 19078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.303159', 'step': 19078, 'epoch': 3} {'type': 'loss', 'content': 0.0843135342001915, 'timestamp': '2025-10-01 04:36:26.305550', 'step': 19079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.336694', 'step': 19079, 'epoch': 3} {'type': 'loss', 'content': 0.07860682159662247, 'timestamp': '2025-10-01 04:36:26.360642', 'step': 19080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.391353', 'step': 19080, 'epoch': 3} {'type': 'loss', 'content': 0.030660394579172134, 'timestamp': '2025-10-01 04:36:26.393520', 'step': 19081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:26.424390', 'step': 19081, 'epoch': 3} {'type': 'loss', 'content': 0.12087346613407135, 'timestamp': '2025-10-01 04:36:26.426728', 'step': 19082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.457538', 'step': 19082, 'epoch': 3} {'type': 'loss', 'content': 0.07327628880739212, 'timestamp': '2025-10-01 04:36:26.460425', 'step': 19083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:26.502196', 'step': 19083, 'epoch': 3} {'type': 'loss', 'content': 0.02136549912393093, 'timestamp': '2025-10-01 04:36:26.527003', 'step': 19084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.557713', 'step': 19084, 'epoch': 3} {'type': 'loss', 'content': 0.06822644174098969, 'timestamp': '2025-10-01 04:36:26.560170', 'step': 19085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.590898', 'step': 19085, 'epoch': 3} {'type': 'loss', 'content': 0.048024293035268784, 'timestamp': '2025-10-01 04:36:26.606368', 'step': 19086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.636953', 'step': 19086, 'epoch': 3} {'type': 'loss', 'content': 0.10281479358673096, 'timestamp': '2025-10-01 04:36:26.639124', 'step': 19087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.671569', 'step': 19087, 'epoch': 3} {'type': 'loss', 'content': 0.043327346444129944, 'timestamp': '2025-10-01 04:36:26.695224', 'step': 19088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:26.734621', 'step': 19088, 'epoch': 3} {'type': 'loss', 'content': 0.027111949399113655, 'timestamp': '2025-10-01 04:36:26.737210', 'step': 19089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:26.778579', 'step': 19089, 'epoch': 3} {'type': 'loss', 'content': 0.05667247250676155, 'timestamp': '2025-10-01 04:36:26.780935', 'step': 19090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:26.811211', 'step': 19090, 'epoch': 3} {'type': 'loss', 'content': 0.0025363615714013577, 'timestamp': '2025-10-01 04:36:26.813560', 'step': 19091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:26.844357', 'step': 19091, 'epoch': 3} {'type': 'loss', 'content': 0.023664025589823723, 'timestamp': '2025-10-01 04:36:26.871312', 'step': 19092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:26.901982', 'step': 19092, 'epoch': 3} {'type': 'loss', 'content': 0.06239032745361328, 'timestamp': '2025-10-01 04:36:26.905244', 'step': 19093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:26.935535', 'step': 19093, 'epoch': 3} {'type': 'loss', 'content': 0.01990675926208496, 'timestamp': '2025-10-01 04:36:26.937914', 'step': 19094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:26.969202', 'step': 19094, 'epoch': 3} {'type': 'loss', 'content': 0.035081472247838974, 'timestamp': '2025-10-01 04:36:26.971375', 'step': 19095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:27.001629', 'step': 19095, 'epoch': 3} {'type': 'loss', 'content': 0.0643518790602684, 'timestamp': '2025-10-01 04:36:27.025254', 'step': 19096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:27.056549', 'step': 19096, 'epoch': 3} {'type': 'loss', 'content': 0.03517085313796997, 'timestamp': '2025-10-01 04:36:27.058871', 'step': 19097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:27.090516', 'step': 19097, 'epoch': 3} {'type': 'loss', 'content': 0.1144971176981926, 'timestamp': '2025-10-01 04:36:27.092765', 'step': 19098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:27.135343', 'step': 19098, 'epoch': 3} {'type': 'loss', 'content': 0.06446152180433273, 'timestamp': '2025-10-01 04:36:27.137520', 'step': 19099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:27.169675', 'step': 19099, 'epoch': 3} {'type': 'loss', 'content': 0.059156592935323715, 'timestamp': '2025-10-01 04:36:27.205252', 'step': 19100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:27.236680', 'step': 19100, 'epoch': 3} {'type': 'loss', 'content': 0.06896119564771652, 'timestamp': '2025-10-01 04:36:27.238822', 'step': 19101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:27.268453', 'step': 19101, 'epoch': 3} {'type': 'loss', 'content': 0.04667520895600319, 'timestamp': '2025-10-01 04:36:27.270673', 'step': 19102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:27.300909', 'step': 19102, 'epoch': 3} {'type': 'loss', 'content': 0.05781719461083412, 'timestamp': '2025-10-01 04:36:27.303204', 'step': 19103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:27.334290', 'step': 19103, 'epoch': 3} {'type': 'loss', 'content': 0.048524871468544006, 'timestamp': '2025-10-01 04:36:27.358218', 'step': 19104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:27.403904', 'step': 19104, 'epoch': 3} {'type': 'loss', 'content': 0.07062703371047974, 'timestamp': '2025-10-01 04:36:27.406281', 'step': 19105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:27.437448', 'step': 19105, 'epoch': 3} {'type': 'loss', 'content': 0.0462343730032444, 'timestamp': '2025-10-01 04:36:27.440326', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:36:37.519941', 'step': 19106, 'epoch': 3} {'type': 'pplx', 'content': 9529.82787753749, 'timestamp': '2025-10-01 04:36:37.522963', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:37.554006', 'step': 19106, 'epoch': 3} {'type': 'loss', 'content': 0.01756232976913452, 'timestamp': '2025-10-01 04:36:37.556331', 'step': 19107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:37.609726', 'step': 19107, 'epoch': 3} {'type': 'loss', 'content': 0.06320653110742569, 'timestamp': '2025-10-01 04:36:37.633596', 'step': 19108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:37.667075', 'step': 19108, 'epoch': 3} {'type': 'loss', 'content': 0.01185118593275547, 'timestamp': '2025-10-01 04:36:37.669325', 'step': 19109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:37.702195', 'step': 19109, 'epoch': 3} {'type': 'loss', 'content': 0.10706289112567902, 'timestamp': '2025-10-01 04:36:37.723571', 'step': 19110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:37.768601', 'step': 19110, 'epoch': 3} {'type': 'loss', 'content': 0.05276419594883919, 'timestamp': '2025-10-01 04:36:37.772721', 'step': 19111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:37.825592', 'step': 19111, 'epoch': 3} {'type': 'loss', 'content': 0.09590541571378708, 'timestamp': '2025-10-01 04:36:37.850205', 'step': 19112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:37.881482', 'step': 19112, 'epoch': 3} {'type': 'loss', 'content': 0.06600885093212128, 'timestamp': '2025-10-01 04:36:37.884719', 'step': 19113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:37.917146', 'step': 19113, 'epoch': 3} {'type': 'loss', 'content': 0.1487048715353012, 'timestamp': '2025-10-01 04:36:37.919560', 'step': 19114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:37.959209', 'step': 19114, 'epoch': 3} {'type': 'loss', 'content': 0.032979100942611694, 'timestamp': '2025-10-01 04:36:37.961634', 'step': 19115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:37.992703', 'step': 19115, 'epoch': 3} {'type': 'loss', 'content': 0.06395996361970901, 'timestamp': '2025-10-01 04:36:38.016738', 'step': 19116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.048541', 'step': 19116, 'epoch': 3} {'type': 'loss', 'content': 0.055528514087200165, 'timestamp': '2025-10-01 04:36:38.051199', 'step': 19117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.082576', 'step': 19117, 'epoch': 3} {'type': 'loss', 'content': 0.06933635473251343, 'timestamp': '2025-10-01 04:36:38.084920', 'step': 19118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:38.118261', 'step': 19118, 'epoch': 3} {'type': 'loss', 'content': 0.041357241570949554, 'timestamp': '2025-10-01 04:36:38.120503', 'step': 19119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.152041', 'step': 19119, 'epoch': 3} {'type': 'loss', 'content': 0.038855165243148804, 'timestamp': '2025-10-01 04:36:38.175906', 'step': 19120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.207181', 'step': 19120, 'epoch': 3} {'type': 'loss', 'content': 0.08227819949388504, 'timestamp': '2025-10-01 04:36:38.210502', 'step': 19121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.241472', 'step': 19121, 'epoch': 3} {'type': 'loss', 'content': 0.08959698677062988, 'timestamp': '2025-10-01 04:36:38.243874', 'step': 19122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.275700', 'step': 19122, 'epoch': 3} {'type': 'loss', 'content': 0.03230788931250572, 'timestamp': '2025-10-01 04:36:38.289421', 'step': 19123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:38.323878', 'step': 19123, 'epoch': 3} {'type': 'loss', 'content': 0.10146434605121613, 'timestamp': '2025-10-01 04:36:38.348567', 'step': 19124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:38.383798', 'step': 19124, 'epoch': 3} {'type': 'loss', 'content': 0.0395793542265892, 'timestamp': '2025-10-01 04:36:38.399537', 'step': 19125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:38.434683', 'step': 19125, 'epoch': 3} {'type': 'loss', 'content': 0.0644923746585846, 'timestamp': '2025-10-01 04:36:38.439475', 'step': 19126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:38.472997', 'step': 19126, 'epoch': 3} {'type': 'loss', 'content': 0.025871602818369865, 'timestamp': '2025-10-01 04:36:38.475334', 'step': 19127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:38.506121', 'step': 19127, 'epoch': 3} {'type': 'loss', 'content': 0.031096020713448524, 'timestamp': '2025-10-01 04:36:38.531448', 'step': 19128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.579105', 'step': 19128, 'epoch': 3} {'type': 'loss', 'content': 0.07292553782463074, 'timestamp': '2025-10-01 04:36:38.581412', 'step': 19129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.616449', 'step': 19129, 'epoch': 3} {'type': 'loss', 'content': 0.0810566395521164, 'timestamp': '2025-10-01 04:36:38.618717', 'step': 19130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:38.650123', 'step': 19130, 'epoch': 3} {'type': 'loss', 'content': 0.06822384893894196, 'timestamp': '2025-10-01 04:36:38.652481', 'step': 19131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:38.684672', 'step': 19131, 'epoch': 3} {'type': 'loss', 'content': 0.16503693163394928, 'timestamp': '2025-10-01 04:36:38.708777', 'step': 19132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:38.750565', 'step': 19132, 'epoch': 3} {'type': 'loss', 'content': 0.03299575671553612, 'timestamp': '2025-10-01 04:36:38.759159', 'step': 19133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.798559', 'step': 19133, 'epoch': 3} {'type': 'loss', 'content': 0.038380153477191925, 'timestamp': '2025-10-01 04:36:38.800856', 'step': 19134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:38.835728', 'step': 19134, 'epoch': 3} {'type': 'loss', 'content': 0.07296084612607956, 'timestamp': '2025-10-01 04:36:38.846423', 'step': 19135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.881110', 'step': 19135, 'epoch': 3} {'type': 'loss', 'content': 0.05118953436613083, 'timestamp': '2025-10-01 04:36:38.905443', 'step': 19136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:38.957606', 'step': 19136, 'epoch': 3} {'type': 'loss', 'content': 0.09286037087440491, 'timestamp': '2025-10-01 04:36:38.960086', 'step': 19137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:38.999555', 'step': 19137, 'epoch': 3} {'type': 'loss', 'content': 0.031126683577895164, 'timestamp': '2025-10-01 04:36:39.011456', 'step': 19138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:39.054526', 'step': 19138, 'epoch': 3} {'type': 'loss', 'content': 0.07985862344503403, 'timestamp': '2025-10-01 04:36:39.056871', 'step': 19139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:39.090092', 'step': 19139, 'epoch': 3} {'type': 'loss', 'content': 0.07742924988269806, 'timestamp': '2025-10-01 04:36:39.113952', 'step': 19140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:39.149681', 'step': 19140, 'epoch': 3} {'type': 'loss', 'content': 0.07181088626384735, 'timestamp': '2025-10-01 04:36:39.165199', 'step': 19141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:39.198541', 'step': 19141, 'epoch': 3} {'type': 'loss', 'content': 0.05450735241174698, 'timestamp': '2025-10-01 04:36:39.201311', 'step': 19142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:39.246676', 'step': 19142, 'epoch': 3} {'type': 'loss', 'content': 0.0863182470202446, 'timestamp': '2025-10-01 04:36:39.250601', 'step': 19143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:39.283322', 'step': 19143, 'epoch': 3} {'type': 'loss', 'content': 0.07019554078578949, 'timestamp': '2025-10-01 04:36:39.307352', 'step': 19144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:39.358984', 'step': 19144, 'epoch': 3} {'type': 'loss', 'content': 0.05479469150304794, 'timestamp': '2025-10-01 04:36:39.361848', 'step': 19145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:39.395002', 'step': 19145, 'epoch': 3} {'type': 'loss', 'content': 0.0364820659160614, 'timestamp': '2025-10-01 04:36:39.401059', 'step': 19146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:39.434582', 'step': 19146, 'epoch': 3} {'type': 'loss', 'content': 0.10246660560369492, 'timestamp': '2025-10-01 04:36:39.437045', 'step': 19147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:39.474980', 'step': 19147, 'epoch': 3} {'type': 'loss', 'content': 0.09474633634090424, 'timestamp': '2025-10-01 04:36:39.498987', 'step': 19148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:39.540304', 'step': 19148, 'epoch': 3} {'type': 'loss', 'content': 0.04843462258577347, 'timestamp': '2025-10-01 04:36:39.542622', 'step': 19149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:39.581097', 'step': 19149, 'epoch': 3} {'type': 'loss', 'content': 0.035531532019376755, 'timestamp': '2025-10-01 04:36:39.583763', 'step': 19150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:39.619279', 'step': 19150, 'epoch': 3} {'type': 'loss', 'content': 0.01767653040587902, 'timestamp': '2025-10-01 04:36:39.622149', 'step': 19151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:39.664625', 'step': 19151, 'epoch': 3} {'type': 'loss', 'content': 0.07702529430389404, 'timestamp': '2025-10-01 04:36:39.688620', 'step': 19152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:39.726401', 'step': 19152, 'epoch': 3} {'type': 'loss', 'content': 0.042064543813467026, 'timestamp': '2025-10-01 04:36:39.728958', 'step': 19153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:39.777484', 'step': 19153, 'epoch': 3} {'type': 'loss', 'content': 0.09714993089437485, 'timestamp': '2025-10-01 04:36:39.783584', 'step': 19154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:39.824787', 'step': 19154, 'epoch': 3} {'type': 'loss', 'content': 0.10646922886371613, 'timestamp': '2025-10-01 04:36:39.829409', 'step': 19155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:39.875382', 'step': 19155, 'epoch': 3} {'type': 'loss', 'content': 0.12516359984874725, 'timestamp': '2025-10-01 04:36:39.899550', 'step': 19156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:39.948437', 'step': 19156, 'epoch': 3} {'type': 'loss', 'content': 0.0641595721244812, 'timestamp': '2025-10-01 04:36:39.951210', 'step': 19157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:39.983915', 'step': 19157, 'epoch': 3} {'type': 'loss', 'content': 0.03772207722067833, 'timestamp': '2025-10-01 04:36:39.986735', 'step': 19158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:40.029098', 'step': 19158, 'epoch': 3} {'type': 'loss', 'content': 0.03690085932612419, 'timestamp': '2025-10-01 04:36:40.031970', 'step': 19159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:40.073682', 'step': 19159, 'epoch': 3} {'type': 'loss', 'content': 0.027057167142629623, 'timestamp': '2025-10-01 04:36:40.098848', 'step': 19160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:40.130271', 'step': 19160, 'epoch': 3} {'type': 'loss', 'content': 0.020688103511929512, 'timestamp': '2025-10-01 04:36:40.133631', 'step': 19161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:40.177534', 'step': 19161, 'epoch': 3} {'type': 'loss', 'content': 0.019073279574513435, 'timestamp': '2025-10-01 04:36:40.180634', 'step': 19162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:40.213999', 'step': 19162, 'epoch': 3} {'type': 'loss', 'content': 0.10206693410873413, 'timestamp': '2025-10-01 04:36:40.216354', 'step': 19163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:40.253225', 'step': 19163, 'epoch': 3} {'type': 'loss', 'content': 0.08357053250074387, 'timestamp': '2025-10-01 04:36:40.278088', 'step': 19164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:40.309684', 'step': 19164, 'epoch': 3} {'type': 'loss', 'content': 0.03728022053837776, 'timestamp': '2025-10-01 04:36:40.312928', 'step': 19165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:40.351449', 'step': 19165, 'epoch': 3} {'type': 'loss', 'content': 0.08236979693174362, 'timestamp': '2025-10-01 04:36:40.354230', 'step': 19166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:40.398221', 'step': 19166, 'epoch': 3} {'type': 'loss', 'content': 0.1277993619441986, 'timestamp': '2025-10-01 04:36:40.401126', 'step': 19167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:40.448404', 'step': 19167, 'epoch': 3} {'type': 'loss', 'content': 0.0919988676905632, 'timestamp': '2025-10-01 04:36:40.472231', 'step': 19168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:40.510011', 'step': 19168, 'epoch': 3} {'type': 'loss', 'content': 0.09200093150138855, 'timestamp': '2025-10-01 04:36:40.512452', 'step': 19169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:40.549926', 'step': 19169, 'epoch': 3} {'type': 'loss', 'content': 0.12253072112798691, 'timestamp': '2025-10-01 04:36:40.552407', 'step': 19170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:40.597792', 'step': 19170, 'epoch': 3} {'type': 'loss', 'content': 0.05828525498509407, 'timestamp': '2025-10-01 04:36:40.601748', 'step': 19171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:40.649757', 'step': 19171, 'epoch': 3} {'type': 'loss', 'content': 0.05967647582292557, 'timestamp': '2025-10-01 04:36:40.677378', 'step': 19172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:40.717743', 'step': 19172, 'epoch': 3} {'type': 'loss', 'content': 0.047120075672864914, 'timestamp': '2025-10-01 04:36:40.720314', 'step': 19173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:40.752783', 'step': 19173, 'epoch': 3} {'type': 'loss', 'content': 0.12063968926668167, 'timestamp': '2025-10-01 04:36:40.754641', 'step': 19174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:40.785586', 'step': 19174, 'epoch': 3} {'type': 'loss', 'content': 0.08169980347156525, 'timestamp': '2025-10-01 04:36:40.792926', 'step': 19175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:36:40.827161', 'step': 19175, 'epoch': 3} {'type': 'loss', 'content': 0.023274041712284088, 'timestamp': '2025-10-01 04:36:40.852534', 'step': 19176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:40.883792', 'step': 19176, 'epoch': 3} {'type': 'loss', 'content': 0.06908930093050003, 'timestamp': '2025-10-01 04:36:40.887153', 'step': 19177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:40.919721', 'step': 19177, 'epoch': 3} {'type': 'loss', 'content': 0.15633507072925568, 'timestamp': '2025-10-01 04:36:40.921907', 'step': 19178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:40.953365', 'step': 19178, 'epoch': 3} {'type': 'loss', 'content': 0.06765597313642502, 'timestamp': '2025-10-01 04:36:40.955989', 'step': 19179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:41.000058', 'step': 19179, 'epoch': 3} {'type': 'loss', 'content': 0.08874194324016571, 'timestamp': '2025-10-01 04:36:41.023820', 'step': 19180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:41.054407', 'step': 19180, 'epoch': 3} {'type': 'loss', 'content': 0.10601542890071869, 'timestamp': '2025-10-01 04:36:41.059416', 'step': 19181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:41.091532', 'step': 19181, 'epoch': 3} {'type': 'loss', 'content': 0.043955832719802856, 'timestamp': '2025-10-01 04:36:41.101025', 'step': 19182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:41.137833', 'step': 19182, 'epoch': 3} {'type': 'loss', 'content': 0.07908542454242706, 'timestamp': '2025-10-01 04:36:41.141524', 'step': 19183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:41.172044', 'step': 19183, 'epoch': 3} {'type': 'loss', 'content': 0.021478651091456413, 'timestamp': '2025-10-01 04:36:41.195690', 'step': 19184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:41.227057', 'step': 19184, 'epoch': 3} {'type': 'loss', 'content': 0.030654650181531906, 'timestamp': '2025-10-01 04:36:41.229525', 'step': 19185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:41.261125', 'step': 19185, 'epoch': 3} {'type': 'loss', 'content': 0.13515016436576843, 'timestamp': '2025-10-01 04:36:41.263593', 'step': 19186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:41.294843', 'step': 19186, 'epoch': 3} {'type': 'loss', 'content': 0.07294674962759018, 'timestamp': '2025-10-01 04:36:41.297668', 'step': 19187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:41.328119', 'step': 19187, 'epoch': 3} {'type': 'loss', 'content': 0.08219446986913681, 'timestamp': '2025-10-01 04:36:41.352002', 'step': 19188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:41.395536', 'step': 19188, 'epoch': 3} {'type': 'loss', 'content': 0.11610422283411026, 'timestamp': '2025-10-01 04:36:41.398463', 'step': 19189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:41.429343', 'step': 19189, 'epoch': 3} {'type': 'loss', 'content': 0.03448794409632683, 'timestamp': '2025-10-01 04:36:41.431555', 'step': 19190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:41.461473', 'step': 19190, 'epoch': 3} {'type': 'loss', 'content': 0.04578837379813194, 'timestamp': '2025-10-01 04:36:41.463655', 'step': 19191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:41.506792', 'step': 19191, 'epoch': 3} {'type': 'loss', 'content': 0.11084096133708954, 'timestamp': '2025-10-01 04:36:41.530458', 'step': 19192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:41.561348', 'step': 19192, 'epoch': 3} {'type': 'loss', 'content': 0.09945505112409592, 'timestamp': '2025-10-01 04:36:41.563408', 'step': 19193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:41.594911', 'step': 19193, 'epoch': 3} {'type': 'loss', 'content': 0.09032706916332245, 'timestamp': '2025-10-01 04:36:41.597228', 'step': 19194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:41.628115', 'step': 19194, 'epoch': 3} {'type': 'loss', 'content': 0.05600578710436821, 'timestamp': '2025-10-01 04:36:41.630874', 'step': 19195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:41.661034', 'step': 19195, 'epoch': 3} {'type': 'loss', 'content': 0.028441140428185463, 'timestamp': '2025-10-01 04:36:41.684821', 'step': 19196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:41.715107', 'step': 19196, 'epoch': 3} {'type': 'loss', 'content': 0.07381390780210495, 'timestamp': '2025-10-01 04:36:41.717259', 'step': 19197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:41.747678', 'step': 19197, 'epoch': 3} {'type': 'loss', 'content': 0.01823064684867859, 'timestamp': '2025-10-01 04:36:41.750049', 'step': 19198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:41.780238', 'step': 19198, 'epoch': 3} {'type': 'loss', 'content': 0.06359590590000153, 'timestamp': '2025-10-01 04:36:41.782730', 'step': 19199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:41.813929', 'step': 19199, 'epoch': 3} {'type': 'loss', 'content': 0.1438809037208557, 'timestamp': '2025-10-01 04:36:41.837605', 'step': 19200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:41.867885', 'step': 19200, 'epoch': 3} {'type': 'loss', 'content': 0.0740240290760994, 'timestamp': '2025-10-01 04:36:41.872292', 'step': 19201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:41.904482', 'step': 19201, 'epoch': 3} {'type': 'loss', 'content': 0.03841584548354149, 'timestamp': '2025-10-01 04:36:41.906726', 'step': 19202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:41.937707', 'step': 19202, 'epoch': 3} {'type': 'loss', 'content': 0.080138199031353, 'timestamp': '2025-10-01 04:36:41.940083', 'step': 19203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:41.971073', 'step': 19203, 'epoch': 3} {'type': 'loss', 'content': 0.06480656564235687, 'timestamp': '2025-10-01 04:36:41.994799', 'step': 19204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.027368', 'step': 19204, 'epoch': 3} {'type': 'loss', 'content': 0.06628865003585815, 'timestamp': '2025-10-01 04:36:42.029874', 'step': 19205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.061306', 'step': 19205, 'epoch': 3} {'type': 'loss', 'content': 0.08388172090053558, 'timestamp': '2025-10-01 04:36:42.063335', 'step': 19206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:42.094607', 'step': 19206, 'epoch': 3} {'type': 'loss', 'content': 0.09037569910287857, 'timestamp': '2025-10-01 04:36:42.102112', 'step': 19207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.134168', 'step': 19207, 'epoch': 3} {'type': 'loss', 'content': 0.07166197896003723, 'timestamp': '2025-10-01 04:36:42.157955', 'step': 19208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:42.190518', 'step': 19208, 'epoch': 3} {'type': 'loss', 'content': 0.06690996885299683, 'timestamp': '2025-10-01 04:36:42.192757', 'step': 19209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.225467', 'step': 19209, 'epoch': 3} {'type': 'loss', 'content': 0.2281961441040039, 'timestamp': '2025-10-01 04:36:42.227665', 'step': 19210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:42.259697', 'step': 19210, 'epoch': 3} {'type': 'loss', 'content': 0.06663766503334045, 'timestamp': '2025-10-01 04:36:42.262178', 'step': 19211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.293710', 'step': 19211, 'epoch': 3} {'type': 'loss', 'content': 0.04823089763522148, 'timestamp': '2025-10-01 04:36:42.317349', 'step': 19212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:42.347945', 'step': 19212, 'epoch': 3} {'type': 'loss', 'content': 0.1704472303390503, 'timestamp': '2025-10-01 04:36:42.350081', 'step': 19213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:42.385691', 'step': 19213, 'epoch': 3} {'type': 'loss', 'content': 0.027178136631846428, 'timestamp': '2025-10-01 04:36:42.387887', 'step': 19214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:42.418699', 'step': 19214, 'epoch': 3} {'type': 'loss', 'content': 0.05895643308758736, 'timestamp': '2025-10-01 04:36:42.421433', 'step': 19215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:42.453648', 'step': 19215, 'epoch': 3} {'type': 'loss', 'content': 0.09469319880008698, 'timestamp': '2025-10-01 04:36:42.493919', 'step': 19216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.526191', 'step': 19216, 'epoch': 3} {'type': 'loss', 'content': 0.08787509053945541, 'timestamp': '2025-10-01 04:36:42.532124', 'step': 19217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:42.562683', 'step': 19217, 'epoch': 3} {'type': 'loss', 'content': 0.039417825639247894, 'timestamp': '2025-10-01 04:36:42.565119', 'step': 19218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.595734', 'step': 19218, 'epoch': 3} {'type': 'loss', 'content': 0.10982391983270645, 'timestamp': '2025-10-01 04:36:42.597960', 'step': 19219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.633681', 'step': 19219, 'epoch': 3} {'type': 'loss', 'content': 0.12439769506454468, 'timestamp': '2025-10-01 04:36:42.669143', 'step': 19220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:36:42.700214', 'step': 19220, 'epoch': 3} {'type': 'loss', 'content': 0.09710272401571274, 'timestamp': '2025-10-01 04:36:42.704477', 'step': 19221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:42.745475', 'step': 19221, 'epoch': 3} {'type': 'loss', 'content': 0.046176936477422714, 'timestamp': '2025-10-01 04:36:42.747718', 'step': 19222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.778871', 'step': 19222, 'epoch': 3} {'type': 'loss', 'content': 0.08238323032855988, 'timestamp': '2025-10-01 04:36:42.781278', 'step': 19223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.812706', 'step': 19223, 'epoch': 3} {'type': 'loss', 'content': 0.047629889100790024, 'timestamp': '2025-10-01 04:36:42.836426', 'step': 19224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:42.872077', 'step': 19224, 'epoch': 3} {'type': 'loss', 'content': 0.03539205715060234, 'timestamp': '2025-10-01 04:36:42.874216', 'step': 19225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:42.905675', 'step': 19225, 'epoch': 3} {'type': 'loss', 'content': 0.029773954302072525, 'timestamp': '2025-10-01 04:36:42.909098', 'step': 19226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:42.943444', 'step': 19226, 'epoch': 3} {'type': 'loss', 'content': 0.04481285437941551, 'timestamp': '2025-10-01 04:36:42.946444', 'step': 19227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:42.976837', 'step': 19227, 'epoch': 3} {'type': 'loss', 'content': 0.1559213548898697, 'timestamp': '2025-10-01 04:36:43.000756', 'step': 19228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:43.032382', 'step': 19228, 'epoch': 3} {'type': 'loss', 'content': 0.10370510816574097, 'timestamp': '2025-10-01 04:36:43.034896', 'step': 19229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:43.066203', 'step': 19229, 'epoch': 3} {'type': 'loss', 'content': 0.12897104024887085, 'timestamp': '2025-10-01 04:36:43.068407', 'step': 19230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:43.100042', 'step': 19230, 'epoch': 3} {'type': 'loss', 'content': 0.007665353827178478, 'timestamp': '2025-10-01 04:36:43.102473', 'step': 19231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:43.135509', 'step': 19231, 'epoch': 3} {'type': 'loss', 'content': 0.07921893894672394, 'timestamp': '2025-10-01 04:36:43.159103', 'step': 19232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:43.190749', 'step': 19232, 'epoch': 3} {'type': 'loss', 'content': 0.05693382769823074, 'timestamp': '2025-10-01 04:36:43.193423', 'step': 19233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:43.223733', 'step': 19233, 'epoch': 3} {'type': 'loss', 'content': 0.09166095405817032, 'timestamp': '2025-10-01 04:36:43.225936', 'step': 19234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:43.256322', 'step': 19234, 'epoch': 3} {'type': 'loss', 'content': 0.10706423968076706, 'timestamp': '2025-10-01 04:36:43.258404', 'step': 19235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:43.288915', 'step': 19235, 'epoch': 3} {'type': 'loss', 'content': 0.058410827070474625, 'timestamp': '2025-10-01 04:36:43.312539', 'step': 19236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:43.344204', 'step': 19236, 'epoch': 3} {'type': 'loss', 'content': 0.14491568505764008, 'timestamp': '2025-10-01 04:36:43.346421', 'step': 19237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:43.376705', 'step': 19237, 'epoch': 3} {'type': 'loss', 'content': 0.051605433225631714, 'timestamp': '2025-10-01 04:36:43.379708', 'step': 19238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:43.411148', 'step': 19238, 'epoch': 3} {'type': 'loss', 'content': 0.08317524939775467, 'timestamp': '2025-10-01 04:36:43.418468', 'step': 19239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:43.449573', 'step': 19239, 'epoch': 3} {'type': 'loss', 'content': 0.07999039441347122, 'timestamp': '2025-10-01 04:36:43.473332', 'step': 19240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:43.503894', 'step': 19240, 'epoch': 3} {'type': 'loss', 'content': 0.05962413549423218, 'timestamp': '2025-10-01 04:36:43.510318', 'step': 19241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:43.543633', 'step': 19241, 'epoch': 3} {'type': 'loss', 'content': 0.08464597910642624, 'timestamp': '2025-10-01 04:36:43.545973', 'step': 19242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:43.577965', 'step': 19242, 'epoch': 3} {'type': 'loss', 'content': 0.09995214641094208, 'timestamp': '2025-10-01 04:36:43.580267', 'step': 19243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:43.619067', 'step': 19243, 'epoch': 3} {'type': 'loss', 'content': 0.05215385556221008, 'timestamp': '2025-10-01 04:36:43.643017', 'step': 19244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:43.674979', 'step': 19244, 'epoch': 3} {'type': 'loss', 'content': 0.03994634002447128, 'timestamp': '2025-10-01 04:36:43.677134', 'step': 19245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:43.709514', 'step': 19245, 'epoch': 3} {'type': 'loss', 'content': 0.08937830477952957, 'timestamp': '2025-10-01 04:36:43.711601', 'step': 19246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:43.746030', 'step': 19246, 'epoch': 3} {'type': 'loss', 'content': 0.053181860595941544, 'timestamp': '2025-10-01 04:36:43.748357', 'step': 19247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:43.780035', 'step': 19247, 'epoch': 3} {'type': 'loss', 'content': 0.05162075161933899, 'timestamp': '2025-10-01 04:36:43.803733', 'step': 19248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:43.840925', 'step': 19248, 'epoch': 3} {'type': 'loss', 'content': 0.10630220919847488, 'timestamp': '2025-10-01 04:36:43.844724', 'step': 19249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:43.876064', 'step': 19249, 'epoch': 3} {'type': 'loss', 'content': 0.11376559734344482, 'timestamp': '2025-10-01 04:36:43.878051', 'step': 19250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:43.910048', 'step': 19250, 'epoch': 3} {'type': 'loss', 'content': 0.12312179058790207, 'timestamp': '2025-10-01 04:36:43.912213', 'step': 19251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:43.954174', 'step': 19251, 'epoch': 3} {'type': 'loss', 'content': 0.0754244402050972, 'timestamp': '2025-10-01 04:36:43.977669', 'step': 19252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:44.008272', 'step': 19252, 'epoch': 3} {'type': 'loss', 'content': 0.1066155880689621, 'timestamp': '2025-10-01 04:36:44.010570', 'step': 19253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:44.041705', 'step': 19253, 'epoch': 3} {'type': 'loss', 'content': 0.04180543124675751, 'timestamp': '2025-10-01 04:36:44.043800', 'step': 19254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:44.074994', 'step': 19254, 'epoch': 3} {'type': 'loss', 'content': 0.05628632754087448, 'timestamp': '2025-10-01 04:36:44.077255', 'step': 19255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:44.109162', 'step': 19255, 'epoch': 3} {'type': 'loss', 'content': 0.08970906585454941, 'timestamp': '2025-10-01 04:36:44.132984', 'step': 19256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:44.165596', 'step': 19256, 'epoch': 3} {'type': 'loss', 'content': 0.08150149881839752, 'timestamp': '2025-10-01 04:36:44.167860', 'step': 19257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:44.199195', 'step': 19257, 'epoch': 3} {'type': 'loss', 'content': 0.05599924176931381, 'timestamp': '2025-10-01 04:36:44.201506', 'step': 19258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:44.234199', 'step': 19258, 'epoch': 3} {'type': 'loss', 'content': 0.12139368802309036, 'timestamp': '2025-10-01 04:36:44.236435', 'step': 19259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:44.266681', 'step': 19259, 'epoch': 3} {'type': 'loss', 'content': 0.07315173745155334, 'timestamp': '2025-10-01 04:36:44.290444', 'step': 19260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:44.325379', 'step': 19260, 'epoch': 3} {'type': 'loss', 'content': 0.07911916077136993, 'timestamp': '2025-10-01 04:36:44.327626', 'step': 19261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:44.359826', 'step': 19261, 'epoch': 3} {'type': 'loss', 'content': 0.059416238218545914, 'timestamp': '2025-10-01 04:36:44.362413', 'step': 19262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:44.393031', 'step': 19262, 'epoch': 3} {'type': 'loss', 'content': 0.07200629264116287, 'timestamp': '2025-10-01 04:36:44.395297', 'step': 19263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:44.426676', 'step': 19263, 'epoch': 3} {'type': 'loss', 'content': 0.08026522397994995, 'timestamp': '2025-10-01 04:36:44.450116', 'step': 19264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:44.482113', 'step': 19264, 'epoch': 3} {'type': 'loss', 'content': 0.03114612214267254, 'timestamp': '2025-10-01 04:36:44.484613', 'step': 19265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:44.516143', 'step': 19265, 'epoch': 3} {'type': 'loss', 'content': 0.10503017902374268, 'timestamp': '2025-10-01 04:36:44.518772', 'step': 19266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:44.551111', 'step': 19266, 'epoch': 3} {'type': 'loss', 'content': 0.12238529324531555, 'timestamp': '2025-10-01 04:36:44.553268', 'step': 19267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:44.586330', 'step': 19267, 'epoch': 3} {'type': 'loss', 'content': 0.08857297897338867, 'timestamp': '2025-10-01 04:36:44.610344', 'step': 19268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:44.648708', 'step': 19268, 'epoch': 3} {'type': 'loss', 'content': 0.05336011201143265, 'timestamp': '2025-10-01 04:36:44.651145', 'step': 19269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:44.683288', 'step': 19269, 'epoch': 3} {'type': 'loss', 'content': 0.09845142811536789, 'timestamp': '2025-10-01 04:36:44.699793', 'step': 19270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:44.734538', 'step': 19270, 'epoch': 3} {'type': 'loss', 'content': 0.04264746978878975, 'timestamp': '2025-10-01 04:36:44.737086', 'step': 19271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:44.769567', 'step': 19271, 'epoch': 3} {'type': 'loss', 'content': 0.09892411530017853, 'timestamp': '2025-10-01 04:36:44.793572', 'step': 19272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:44.829120', 'step': 19272, 'epoch': 3} {'type': 'loss', 'content': 0.06648653745651245, 'timestamp': '2025-10-01 04:36:44.832491', 'step': 19273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:44.870500', 'step': 19273, 'epoch': 3} {'type': 'loss', 'content': 0.059226080775260925, 'timestamp': '2025-10-01 04:36:44.873149', 'step': 19274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:44.908202', 'step': 19274, 'epoch': 3} {'type': 'loss', 'content': 0.08573812991380692, 'timestamp': '2025-10-01 04:36:44.910507', 'step': 19275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:44.944552', 'step': 19275, 'epoch': 3} {'type': 'loss', 'content': 0.0309023205190897, 'timestamp': '2025-10-01 04:36:44.968319', 'step': 19276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:45.001677', 'step': 19276, 'epoch': 3} {'type': 'loss', 'content': 0.07865788042545319, 'timestamp': '2025-10-01 04:36:45.003563', 'step': 19277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:45.034811', 'step': 19277, 'epoch': 3} {'type': 'loss', 'content': 0.09825026988983154, 'timestamp': '2025-10-01 04:36:45.037106', 'step': 19278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:45.068823', 'step': 19278, 'epoch': 3} {'type': 'loss', 'content': 0.08843287080526352, 'timestamp': '2025-10-01 04:36:45.071683', 'step': 19279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:45.102921', 'step': 19279, 'epoch': 3} {'type': 'loss', 'content': 0.018720993772149086, 'timestamp': '2025-10-01 04:36:45.126800', 'step': 19280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:45.157873', 'step': 19280, 'epoch': 3} {'type': 'loss', 'content': 0.029799235984683037, 'timestamp': '2025-10-01 04:36:45.162075', 'step': 19281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:45.194685', 'step': 19281, 'epoch': 3} {'type': 'loss', 'content': 0.07172457873821259, 'timestamp': '2025-10-01 04:36:45.196820', 'step': 19282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:45.227245', 'step': 19282, 'epoch': 3} {'type': 'loss', 'content': 0.0483473502099514, 'timestamp': '2025-10-01 04:36:45.230116', 'step': 19283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:45.265397', 'step': 19283, 'epoch': 3} {'type': 'loss', 'content': 0.06972123682498932, 'timestamp': '2025-10-01 04:36:45.289814', 'step': 19284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:45.320810', 'step': 19284, 'epoch': 3} {'type': 'loss', 'content': 0.13067685067653656, 'timestamp': '2025-10-01 04:36:45.326058', 'step': 19285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:45.360595', 'step': 19285, 'epoch': 3} {'type': 'loss', 'content': 0.09070131927728653, 'timestamp': '2025-10-01 04:36:45.363002', 'step': 19286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:45.394355', 'step': 19286, 'epoch': 3} {'type': 'loss', 'content': 0.04041974991559982, 'timestamp': '2025-10-01 04:36:45.397086', 'step': 19287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:45.427676', 'step': 19287, 'epoch': 3} {'type': 'loss', 'content': 0.14431767165660858, 'timestamp': '2025-10-01 04:36:45.452278', 'step': 19288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:45.483243', 'step': 19288, 'epoch': 3} {'type': 'loss', 'content': 0.06340393424034119, 'timestamp': '2025-10-01 04:36:45.485598', 'step': 19289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:45.516707', 'step': 19289, 'epoch': 3} {'type': 'loss', 'content': 0.14347873628139496, 'timestamp': '2025-10-01 04:36:45.519236', 'step': 19290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:45.549678', 'step': 19290, 'epoch': 3} {'type': 'loss', 'content': 0.09300177544355392, 'timestamp': '2025-10-01 04:36:45.551963', 'step': 19291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:45.582729', 'step': 19291, 'epoch': 3} {'type': 'loss', 'content': 0.13072654604911804, 'timestamp': '2025-10-01 04:36:45.606800', 'step': 19292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:45.638144', 'step': 19292, 'epoch': 3} {'type': 'loss', 'content': 0.07019835710525513, 'timestamp': '2025-10-01 04:36:45.640890', 'step': 19293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:45.681228', 'step': 19293, 'epoch': 3} {'type': 'loss', 'content': 0.10112942010164261, 'timestamp': '2025-10-01 04:36:45.683449', 'step': 19294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:45.721351', 'step': 19294, 'epoch': 3} {'type': 'loss', 'content': 0.07704858481884003, 'timestamp': '2025-10-01 04:36:45.727188', 'step': 19295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:45.758591', 'step': 19295, 'epoch': 3} {'type': 'loss', 'content': 0.024842385202646255, 'timestamp': '2025-10-01 04:36:45.782815', 'step': 19296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:45.815535', 'step': 19296, 'epoch': 3} {'type': 'loss', 'content': 0.019721264019608498, 'timestamp': '2025-10-01 04:36:45.817938', 'step': 19297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:45.849246', 'step': 19297, 'epoch': 3} {'type': 'loss', 'content': 0.03948826715350151, 'timestamp': '2025-10-01 04:36:45.851573', 'step': 19298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:45.883375', 'step': 19298, 'epoch': 3} {'type': 'loss', 'content': 0.08712306618690491, 'timestamp': '2025-10-01 04:36:45.886345', 'step': 19299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:45.917876', 'step': 19299, 'epoch': 3} {'type': 'loss', 'content': 0.059359464794397354, 'timestamp': '2025-10-01 04:36:45.943721', 'step': 19300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:45.978063', 'step': 19300, 'epoch': 3} {'type': 'loss', 'content': 0.06438526511192322, 'timestamp': '2025-10-01 04:36:45.980375', 'step': 19301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.012090', 'step': 19301, 'epoch': 3} {'type': 'loss', 'content': 0.10674270242452621, 'timestamp': '2025-10-01 04:36:46.014331', 'step': 19302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.047320', 'step': 19302, 'epoch': 3} {'type': 'loss', 'content': 0.04930821806192398, 'timestamp': '2025-10-01 04:36:46.055966', 'step': 19303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.089323', 'step': 19303, 'epoch': 3} {'type': 'loss', 'content': 0.03174830600619316, 'timestamp': '2025-10-01 04:36:46.113725', 'step': 19304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:46.148089', 'step': 19304, 'epoch': 3} {'type': 'loss', 'content': 0.1394958198070526, 'timestamp': '2025-10-01 04:36:46.150048', 'step': 19305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.182537', 'step': 19305, 'epoch': 3} {'type': 'loss', 'content': 0.09053079038858414, 'timestamp': '2025-10-01 04:36:46.185426', 'step': 19306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:46.221559', 'step': 19306, 'epoch': 3} {'type': 'loss', 'content': 0.058938413858413696, 'timestamp': '2025-10-01 04:36:46.223816', 'step': 19307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:46.267877', 'step': 19307, 'epoch': 3} {'type': 'loss', 'content': 0.029913073405623436, 'timestamp': '2025-10-01 04:36:46.291862', 'step': 19308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.334846', 'step': 19308, 'epoch': 3} {'type': 'loss', 'content': 0.13018791377544403, 'timestamp': '2025-10-01 04:36:46.337510', 'step': 19309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:46.369603', 'step': 19309, 'epoch': 3} {'type': 'loss', 'content': 0.021705573424696922, 'timestamp': '2025-10-01 04:36:46.372215', 'step': 19310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:46.404305', 'step': 19310, 'epoch': 3} {'type': 'loss', 'content': 0.07773500680923462, 'timestamp': '2025-10-01 04:36:46.407782', 'step': 19311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:46.439604', 'step': 19311, 'epoch': 3} {'type': 'loss', 'content': 0.0875377357006073, 'timestamp': '2025-10-01 04:36:46.463626', 'step': 19312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.494918', 'step': 19312, 'epoch': 3} {'type': 'loss', 'content': 0.01377629954367876, 'timestamp': '2025-10-01 04:36:46.497385', 'step': 19313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:46.530267', 'step': 19313, 'epoch': 3} {'type': 'loss', 'content': 0.10890679806470871, 'timestamp': '2025-10-01 04:36:46.532464', 'step': 19314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.564555', 'step': 19314, 'epoch': 3} {'type': 'loss', 'content': 0.10816031694412231, 'timestamp': '2025-10-01 04:36:46.567012', 'step': 19315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.599942', 'step': 19315, 'epoch': 3} {'type': 'loss', 'content': 0.11564729362726212, 'timestamp': '2025-10-01 04:36:46.624607', 'step': 19316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:46.660480', 'step': 19316, 'epoch': 3} {'type': 'loss', 'content': 0.08390211313962936, 'timestamp': '2025-10-01 04:36:46.662769', 'step': 19317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.696809', 'step': 19317, 'epoch': 3} {'type': 'loss', 'content': 0.11647389084100723, 'timestamp': '2025-10-01 04:36:46.699915', 'step': 19318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:46.735522', 'step': 19318, 'epoch': 3} {'type': 'loss', 'content': 0.056000418961048126, 'timestamp': '2025-10-01 04:36:46.737912', 'step': 19319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.769966', 'step': 19319, 'epoch': 3} {'type': 'loss', 'content': 0.08782674372196198, 'timestamp': '2025-10-01 04:36:46.799433', 'step': 19320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.830797', 'step': 19320, 'epoch': 3} {'type': 'loss', 'content': 0.10249508172273636, 'timestamp': '2025-10-01 04:36:46.834200', 'step': 19321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:46.865785', 'step': 19321, 'epoch': 3} {'type': 'loss', 'content': 0.1395667940378189, 'timestamp': '2025-10-01 04:36:46.868134', 'step': 19322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:46.900067', 'step': 19322, 'epoch': 3} {'type': 'loss', 'content': 0.0418027900159359, 'timestamp': '2025-10-01 04:36:46.902280', 'step': 19323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:46.933116', 'step': 19323, 'epoch': 3} {'type': 'loss', 'content': 0.043159693479537964, 'timestamp': '2025-10-01 04:36:46.964800', 'step': 19324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:47.005231', 'step': 19324, 'epoch': 3} {'type': 'loss', 'content': 0.02765739895403385, 'timestamp': '2025-10-01 04:36:47.008593', 'step': 19325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:47.039589', 'step': 19325, 'epoch': 3} {'type': 'loss', 'content': 0.10704337060451508, 'timestamp': '2025-10-01 04:36:47.042035', 'step': 19326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:47.073044', 'step': 19326, 'epoch': 3} {'type': 'loss', 'content': 0.047302231192588806, 'timestamp': '2025-10-01 04:36:47.075559', 'step': 19327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:47.106895', 'step': 19327, 'epoch': 3} {'type': 'loss', 'content': 0.1149609386920929, 'timestamp': '2025-10-01 04:36:47.130719', 'step': 19328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:47.162210', 'step': 19328, 'epoch': 3} {'type': 'loss', 'content': 0.04082249477505684, 'timestamp': '2025-10-01 04:36:47.164703', 'step': 19329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:47.203686', 'step': 19329, 'epoch': 3} {'type': 'loss', 'content': 0.03844001516699791, 'timestamp': '2025-10-01 04:36:47.206098', 'step': 19330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:47.238220', 'step': 19330, 'epoch': 3} {'type': 'loss', 'content': 0.1384543776512146, 'timestamp': '2025-10-01 04:36:47.240864', 'step': 19331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:47.272084', 'step': 19331, 'epoch': 3} {'type': 'loss', 'content': 0.04888831451535225, 'timestamp': '2025-10-01 04:36:47.295859', 'step': 19332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:47.331681', 'step': 19332, 'epoch': 3} {'type': 'loss', 'content': 0.08647700399160385, 'timestamp': '2025-10-01 04:36:47.334315', 'step': 19333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:47.381334', 'step': 19333, 'epoch': 3} {'type': 'loss', 'content': 0.07562422752380371, 'timestamp': '2025-10-01 04:36:47.383658', 'step': 19334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:47.414811', 'step': 19334, 'epoch': 3} {'type': 'loss', 'content': 0.013346155174076557, 'timestamp': '2025-10-01 04:36:47.417206', 'step': 19335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:47.447889', 'step': 19335, 'epoch': 3} {'type': 'loss', 'content': 0.07995251566171646, 'timestamp': '2025-10-01 04:36:47.471646', 'step': 19336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:47.513347', 'step': 19336, 'epoch': 3} {'type': 'loss', 'content': 0.06263759732246399, 'timestamp': '2025-10-01 04:36:47.515651', 'step': 19337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:47.547250', 'step': 19337, 'epoch': 3} {'type': 'loss', 'content': 0.101728156208992, 'timestamp': '2025-10-01 04:36:47.549636', 'step': 19338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:47.580854', 'step': 19338, 'epoch': 3} {'type': 'loss', 'content': 0.12554283440113068, 'timestamp': '2025-10-01 04:36:47.583131', 'step': 19339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:47.613916', 'step': 19339, 'epoch': 3} {'type': 'loss', 'content': 0.09045473486185074, 'timestamp': '2025-10-01 04:36:47.637959', 'step': 19340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:47.669924', 'step': 19340, 'epoch': 3} {'type': 'loss', 'content': 0.0933312475681305, 'timestamp': '2025-10-01 04:36:47.672510', 'step': 19341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:47.704010', 'step': 19341, 'epoch': 3} {'type': 'loss', 'content': 0.06089845672249794, 'timestamp': '2025-10-01 04:36:47.706848', 'step': 19342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:47.737482', 'step': 19342, 'epoch': 3} {'type': 'loss', 'content': 0.08545634895563126, 'timestamp': '2025-10-01 04:36:47.740038', 'step': 19343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:47.772274', 'step': 19343, 'epoch': 3} {'type': 'loss', 'content': 0.08617090433835983, 'timestamp': '2025-10-01 04:36:47.796054', 'step': 19344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:47.826797', 'step': 19344, 'epoch': 3} {'type': 'loss', 'content': 0.05500674992799759, 'timestamp': '2025-10-01 04:36:47.834747', 'step': 19345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:47.868911', 'step': 19345, 'epoch': 3} {'type': 'loss', 'content': 0.12666888535022736, 'timestamp': '2025-10-01 04:36:47.871305', 'step': 19346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:47.903355', 'step': 19346, 'epoch': 3} {'type': 'loss', 'content': 0.04455767944455147, 'timestamp': '2025-10-01 04:36:47.905493', 'step': 19347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:47.936066', 'step': 19347, 'epoch': 3} {'type': 'loss', 'content': 0.029447128996253014, 'timestamp': '2025-10-01 04:36:47.959810', 'step': 19348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:47.990867', 'step': 19348, 'epoch': 3} {'type': 'loss', 'content': 0.07289852946996689, 'timestamp': '2025-10-01 04:36:47.994087', 'step': 19349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:48.024624', 'step': 19349, 'epoch': 3} {'type': 'loss', 'content': 0.06365405768156052, 'timestamp': '2025-10-01 04:36:48.026872', 'step': 19350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:48.057474', 'step': 19350, 'epoch': 3} {'type': 'loss', 'content': 0.06539326161146164, 'timestamp': '2025-10-01 04:36:48.059565', 'step': 19351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:48.089543', 'step': 19351, 'epoch': 3} {'type': 'loss', 'content': 0.07336041331291199, 'timestamp': '2025-10-01 04:36:48.113426', 'step': 19352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:48.152543', 'step': 19352, 'epoch': 3} {'type': 'loss', 'content': 0.053728967905044556, 'timestamp': '2025-10-01 04:36:48.158523', 'step': 19353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:48.194697', 'step': 19353, 'epoch': 3} {'type': 'loss', 'content': 0.040975674986839294, 'timestamp': '2025-10-01 04:36:48.197603', 'step': 19354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:48.240818', 'step': 19354, 'epoch': 3} {'type': 'loss', 'content': 0.1443730890750885, 'timestamp': '2025-10-01 04:36:48.243403', 'step': 19355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:48.274145', 'step': 19355, 'epoch': 3} {'type': 'loss', 'content': 0.043813556432724, 'timestamp': '2025-10-01 04:36:48.297847', 'step': 19356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:48.329755', 'step': 19356, 'epoch': 3} {'type': 'loss', 'content': 0.12367206811904907, 'timestamp': '2025-10-01 04:36:48.338898', 'step': 19357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:48.370393', 'step': 19357, 'epoch': 3} {'type': 'loss', 'content': 0.05770699307322502, 'timestamp': '2025-10-01 04:36:48.383324', 'step': 19358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:48.422736', 'step': 19358, 'epoch': 3} {'type': 'loss', 'content': 0.04925825074315071, 'timestamp': '2025-10-01 04:36:48.425285', 'step': 19359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:48.457416', 'step': 19359, 'epoch': 3} {'type': 'loss', 'content': 0.04120384156703949, 'timestamp': '2025-10-01 04:36:48.481211', 'step': 19360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:48.516055', 'step': 19360, 'epoch': 3} {'type': 'loss', 'content': 0.12470356374979019, 'timestamp': '2025-10-01 04:36:48.518344', 'step': 19361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:48.549383', 'step': 19361, 'epoch': 3} {'type': 'loss', 'content': 0.09931488335132599, 'timestamp': '2025-10-01 04:36:48.552376', 'step': 19362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:48.595050', 'step': 19362, 'epoch': 3} {'type': 'loss', 'content': 0.07375478744506836, 'timestamp': '2025-10-01 04:36:48.597266', 'step': 19363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:48.628308', 'step': 19363, 'epoch': 3} {'type': 'loss', 'content': 0.09011495858430862, 'timestamp': '2025-10-01 04:36:48.651968', 'step': 19364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:48.684203', 'step': 19364, 'epoch': 3} {'type': 'loss', 'content': 0.08117225766181946, 'timestamp': '2025-10-01 04:36:48.691750', 'step': 19365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:48.724813', 'step': 19365, 'epoch': 3} {'type': 'loss', 'content': 0.05633845552802086, 'timestamp': '2025-10-01 04:36:48.731663', 'step': 19366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:48.772821', 'step': 19366, 'epoch': 3} {'type': 'loss', 'content': 0.030950261279940605, 'timestamp': '2025-10-01 04:36:48.775523', 'step': 19367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:48.807923', 'step': 19367, 'epoch': 3} {'type': 'loss', 'content': 0.10075020045042038, 'timestamp': '2025-10-01 04:36:48.831686', 'step': 19368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:48.863202', 'step': 19368, 'epoch': 3} {'type': 'loss', 'content': 0.041805073618888855, 'timestamp': '2025-10-01 04:36:48.868424', 'step': 19369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:48.899022', 'step': 19369, 'epoch': 3} {'type': 'loss', 'content': 0.0964030846953392, 'timestamp': '2025-10-01 04:36:48.901299', 'step': 19370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:48.932020', 'step': 19370, 'epoch': 3} {'type': 'loss', 'content': 0.04821432754397392, 'timestamp': '2025-10-01 04:36:48.935312', 'step': 19371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:48.968952', 'step': 19371, 'epoch': 3} {'type': 'loss', 'content': 0.03909752145409584, 'timestamp': '2025-10-01 04:36:48.992962', 'step': 19372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.035681', 'step': 19372, 'epoch': 3} {'type': 'loss', 'content': 0.06088673695921898, 'timestamp': '2025-10-01 04:36:49.039379', 'step': 19373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.086233', 'step': 19373, 'epoch': 3} {'type': 'loss', 'content': 0.07367536425590515, 'timestamp': '2025-10-01 04:36:49.088534', 'step': 19374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:49.119520', 'step': 19374, 'epoch': 3} {'type': 'loss', 'content': 0.11453605443239212, 'timestamp': '2025-10-01 04:36:49.121986', 'step': 19375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.155316', 'step': 19375, 'epoch': 3} {'type': 'loss', 'content': 0.05920359119772911, 'timestamp': '2025-10-01 04:36:49.179230', 'step': 19376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.210381', 'step': 19376, 'epoch': 3} {'type': 'loss', 'content': 0.049378253519535065, 'timestamp': '2025-10-01 04:36:49.213340', 'step': 19377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:49.244163', 'step': 19377, 'epoch': 3} {'type': 'loss', 'content': 0.06277591735124588, 'timestamp': '2025-10-01 04:36:49.247114', 'step': 19378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:49.280294', 'step': 19378, 'epoch': 3} {'type': 'loss', 'content': 0.03209562972187996, 'timestamp': '2025-10-01 04:36:49.283074', 'step': 19379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.314923', 'step': 19379, 'epoch': 3} {'type': 'loss', 'content': 0.09844889491796494, 'timestamp': '2025-10-01 04:36:49.339099', 'step': 19380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.370298', 'step': 19380, 'epoch': 3} {'type': 'loss', 'content': 0.11822355538606644, 'timestamp': '2025-10-01 04:36:49.379316', 'step': 19381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.411101', 'step': 19381, 'epoch': 3} {'type': 'loss', 'content': 0.051871296018362045, 'timestamp': '2025-10-01 04:36:49.413855', 'step': 19382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.445001', 'step': 19382, 'epoch': 3} {'type': 'loss', 'content': 0.11175761371850967, 'timestamp': '2025-10-01 04:36:49.447341', 'step': 19383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.478792', 'step': 19383, 'epoch': 3} {'type': 'loss', 'content': 0.0919308215379715, 'timestamp': '2025-10-01 04:36:49.503423', 'step': 19384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.534891', 'step': 19384, 'epoch': 3} {'type': 'loss', 'content': 0.024606676772236824, 'timestamp': '2025-10-01 04:36:49.537773', 'step': 19385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.571046', 'step': 19385, 'epoch': 3} {'type': 'loss', 'content': 0.08216754347085953, 'timestamp': '2025-10-01 04:36:49.574505', 'step': 19386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:49.615791', 'step': 19386, 'epoch': 3} {'type': 'loss', 'content': 0.08426231145858765, 'timestamp': '2025-10-01 04:36:49.618714', 'step': 19387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:36:49.661796', 'step': 19387, 'epoch': 3} {'type': 'loss', 'content': 0.07422173023223877, 'timestamp': '2025-10-01 04:36:49.689907', 'step': 19388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:49.721252', 'step': 19388, 'epoch': 3} {'type': 'loss', 'content': 0.012971815653145313, 'timestamp': '2025-10-01 04:36:49.723624', 'step': 19389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:49.756507', 'step': 19389, 'epoch': 3} {'type': 'loss', 'content': 0.048244819045066833, 'timestamp': '2025-10-01 04:36:49.758907', 'step': 19390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:49.789691', 'step': 19390, 'epoch': 3} {'type': 'loss', 'content': 0.06637648493051529, 'timestamp': '2025-10-01 04:36:49.792506', 'step': 19391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:49.823392', 'step': 19391, 'epoch': 3} {'type': 'loss', 'content': 0.057717472314834595, 'timestamp': '2025-10-01 04:36:49.854112', 'step': 19392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:49.902802', 'step': 19392, 'epoch': 3} {'type': 'loss', 'content': 0.1314341425895691, 'timestamp': '2025-10-01 04:36:49.905586', 'step': 19393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:49.936866', 'step': 19393, 'epoch': 3} {'type': 'loss', 'content': 0.08536132425069809, 'timestamp': '2025-10-01 04:36:49.939208', 'step': 19394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:49.973106', 'step': 19394, 'epoch': 3} {'type': 'loss', 'content': 0.09073611348867416, 'timestamp': '2025-10-01 04:36:49.975584', 'step': 19395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:50.006998', 'step': 19395, 'epoch': 3} {'type': 'loss', 'content': 0.01936749741435051, 'timestamp': '2025-10-01 04:36:50.030988', 'step': 19396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:50.062329', 'step': 19396, 'epoch': 3} {'type': 'loss', 'content': 0.03154747188091278, 'timestamp': '2025-10-01 04:36:50.064725', 'step': 19397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:50.097338', 'step': 19397, 'epoch': 3} {'type': 'loss', 'content': 0.05259448662400246, 'timestamp': '2025-10-01 04:36:50.108504', 'step': 19398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:50.140474', 'step': 19398, 'epoch': 3} {'type': 'loss', 'content': 0.04884004220366478, 'timestamp': '2025-10-01 04:36:50.142779', 'step': 19399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:50.174537', 'step': 19399, 'epoch': 3} {'type': 'loss', 'content': 0.12204763293266296, 'timestamp': '2025-10-01 04:36:50.198978', 'step': 19400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:50.234868', 'step': 19400, 'epoch': 3} {'type': 'loss', 'content': 0.10996852070093155, 'timestamp': '2025-10-01 04:36:50.237864', 'step': 19401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:50.269581', 'step': 19401, 'epoch': 3} {'type': 'loss', 'content': 0.02144126407802105, 'timestamp': '2025-10-01 04:36:50.272569', 'step': 19402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:50.303850', 'step': 19402, 'epoch': 3} {'type': 'loss', 'content': 0.05013708397746086, 'timestamp': '2025-10-01 04:36:50.306452', 'step': 19403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:50.337476', 'step': 19403, 'epoch': 3} {'type': 'loss', 'content': 0.09089716523885727, 'timestamp': '2025-10-01 04:36:50.366851', 'step': 19404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:50.397927', 'step': 19404, 'epoch': 3} {'type': 'loss', 'content': 0.014502931386232376, 'timestamp': '2025-10-01 04:36:50.400399', 'step': 19405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:50.431602', 'step': 19405, 'epoch': 3} {'type': 'loss', 'content': 0.07908733189105988, 'timestamp': '2025-10-01 04:36:50.434001', 'step': 19406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:50.478879', 'step': 19406, 'epoch': 3} {'type': 'loss', 'content': 0.11868689209222794, 'timestamp': '2025-10-01 04:36:50.481387', 'step': 19407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:50.512831', 'step': 19407, 'epoch': 3} {'type': 'loss', 'content': 0.039316173642873764, 'timestamp': '2025-10-01 04:36:50.536861', 'step': 19408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:50.569524', 'step': 19408, 'epoch': 3} {'type': 'loss', 'content': 0.05870524421334267, 'timestamp': '2025-10-01 04:36:50.572129', 'step': 19409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:50.602904', 'step': 19409, 'epoch': 3} {'type': 'loss', 'content': 0.05029673874378204, 'timestamp': '2025-10-01 04:36:50.607536', 'step': 19410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:50.643422', 'step': 19410, 'epoch': 3} {'type': 'loss', 'content': 0.05162006616592407, 'timestamp': '2025-10-01 04:36:50.647559', 'step': 19411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:50.685872', 'step': 19411, 'epoch': 3} {'type': 'loss', 'content': 0.03405046463012695, 'timestamp': '2025-10-01 04:36:50.709721', 'step': 19412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:50.740110', 'step': 19412, 'epoch': 3} {'type': 'loss', 'content': 0.07670216262340546, 'timestamp': '2025-10-01 04:36:50.742419', 'step': 19413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:50.773377', 'step': 19413, 'epoch': 3} {'type': 'loss', 'content': 0.03707825019955635, 'timestamp': '2025-10-01 04:36:50.775806', 'step': 19414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:50.808527', 'step': 19414, 'epoch': 3} {'type': 'loss', 'content': 0.15525315701961517, 'timestamp': '2025-10-01 04:36:50.811443', 'step': 19415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:50.842440', 'step': 19415, 'epoch': 3} {'type': 'loss', 'content': 0.05855143442749977, 'timestamp': '2025-10-01 04:36:50.866929', 'step': 19416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:50.897689', 'step': 19416, 'epoch': 3} {'type': 'loss', 'content': 0.14912626147270203, 'timestamp': '2025-10-01 04:36:50.900553', 'step': 19417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:50.931367', 'step': 19417, 'epoch': 3} {'type': 'loss', 'content': 0.08629672974348068, 'timestamp': '2025-10-01 04:36:50.935754', 'step': 19418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:50.967192', 'step': 19418, 'epoch': 3} {'type': 'loss', 'content': 0.07922986894845963, 'timestamp': '2025-10-01 04:36:50.969641', 'step': 19419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:51.002979', 'step': 19419, 'epoch': 3} {'type': 'loss', 'content': 0.03507540747523308, 'timestamp': '2025-10-01 04:36:51.026753', 'step': 19420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:51.056672', 'step': 19420, 'epoch': 3} {'type': 'loss', 'content': 0.09704601764678955, 'timestamp': '2025-10-01 04:36:51.059222', 'step': 19421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:51.090400', 'step': 19421, 'epoch': 3} {'type': 'loss', 'content': 0.043544258922338486, 'timestamp': '2025-10-01 04:36:51.092645', 'step': 19422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:51.122715', 'step': 19422, 'epoch': 3} {'type': 'loss', 'content': 0.1397601217031479, 'timestamp': '2025-10-01 04:36:51.125040', 'step': 19423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:51.156074', 'step': 19423, 'epoch': 3} {'type': 'loss', 'content': 0.041071344166994095, 'timestamp': '2025-10-01 04:36:51.179775', 'step': 19424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:51.210863', 'step': 19424, 'epoch': 3} {'type': 'loss', 'content': 0.07757239043712616, 'timestamp': '2025-10-01 04:36:51.213209', 'step': 19425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:51.243794', 'step': 19425, 'epoch': 3} {'type': 'loss', 'content': 0.07646660506725311, 'timestamp': '2025-10-01 04:36:51.247961', 'step': 19426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:51.288835', 'step': 19426, 'epoch': 3} {'type': 'loss', 'content': 0.0866539254784584, 'timestamp': '2025-10-01 04:36:51.291028', 'step': 19427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:51.320622', 'step': 19427, 'epoch': 3} {'type': 'loss', 'content': 0.11435633897781372, 'timestamp': '2025-10-01 04:36:51.344390', 'step': 19428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:51.375479', 'step': 19428, 'epoch': 3} {'type': 'loss', 'content': 0.05281204730272293, 'timestamp': '2025-10-01 04:36:51.378029', 'step': 19429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:51.409841', 'step': 19429, 'epoch': 3} {'type': 'loss', 'content': 0.11832404136657715, 'timestamp': '2025-10-01 04:36:51.412052', 'step': 19430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:51.442728', 'step': 19430, 'epoch': 3} {'type': 'loss', 'content': 0.06677425652742386, 'timestamp': '2025-10-01 04:36:51.445435', 'step': 19431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:51.476682', 'step': 19431, 'epoch': 3} {'type': 'loss', 'content': 0.09564446657896042, 'timestamp': '2025-10-01 04:36:51.500441', 'step': 19432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:51.531051', 'step': 19432, 'epoch': 3} {'type': 'loss', 'content': 0.1178450956940651, 'timestamp': '2025-10-01 04:36:51.533395', 'step': 19433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:51.564780', 'step': 19433, 'epoch': 3} {'type': 'loss', 'content': 0.04975321143865585, 'timestamp': '2025-10-01 04:36:51.567053', 'step': 19434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:51.597685', 'step': 19434, 'epoch': 3} {'type': 'loss', 'content': 0.07128526270389557, 'timestamp': '2025-10-01 04:36:51.599981', 'step': 19435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:51.630218', 'step': 19435, 'epoch': 3} {'type': 'loss', 'content': 0.08387397974729538, 'timestamp': '2025-10-01 04:36:51.654604', 'step': 19436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:51.695801', 'step': 19436, 'epoch': 3} {'type': 'loss', 'content': 0.056191325187683105, 'timestamp': '2025-10-01 04:36:51.698092', 'step': 19437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:51.728658', 'step': 19437, 'epoch': 3} {'type': 'loss', 'content': 0.05669545754790306, 'timestamp': '2025-10-01 04:36:51.731013', 'step': 19438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:51.761085', 'step': 19438, 'epoch': 3} {'type': 'loss', 'content': 0.06735775619745255, 'timestamp': '2025-10-01 04:36:51.763284', 'step': 19439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:51.793780', 'step': 19439, 'epoch': 3} {'type': 'loss', 'content': 0.09844716638326645, 'timestamp': '2025-10-01 04:36:51.818116', 'step': 19440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:51.848515', 'step': 19440, 'epoch': 3} {'type': 'loss', 'content': 0.042751993983983994, 'timestamp': '2025-10-01 04:36:51.850936', 'step': 19441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:51.881477', 'step': 19441, 'epoch': 3} {'type': 'loss', 'content': 0.06894103437662125, 'timestamp': '2025-10-01 04:36:51.884765', 'step': 19442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:51.926803', 'step': 19442, 'epoch': 3} {'type': 'loss', 'content': 0.0604473315179348, 'timestamp': '2025-10-01 04:36:51.929077', 'step': 19443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:51.959857', 'step': 19443, 'epoch': 3} {'type': 'loss', 'content': 0.04034564644098282, 'timestamp': '2025-10-01 04:36:51.983811', 'step': 19444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:52.016806', 'step': 19444, 'epoch': 3} {'type': 'loss', 'content': 0.02396872267127037, 'timestamp': '2025-10-01 04:36:52.019197', 'step': 19445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:52.049890', 'step': 19445, 'epoch': 3} {'type': 'loss', 'content': 0.07920591533184052, 'timestamp': '2025-10-01 04:36:52.052377', 'step': 19446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:52.083582', 'step': 19446, 'epoch': 3} {'type': 'loss', 'content': 0.11131853610277176, 'timestamp': '2025-10-01 04:36:52.085745', 'step': 19447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:52.117439', 'step': 19447, 'epoch': 3} {'type': 'loss', 'content': 0.047801222652196884, 'timestamp': '2025-10-01 04:36:52.142122', 'step': 19448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:52.173413', 'step': 19448, 'epoch': 3} {'type': 'loss', 'content': 0.06380897015333176, 'timestamp': '2025-10-01 04:36:52.175763', 'step': 19449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:52.206404', 'step': 19449, 'epoch': 3} {'type': 'loss', 'content': 0.11147603392601013, 'timestamp': '2025-10-01 04:36:52.208764', 'step': 19450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:52.239434', 'step': 19450, 'epoch': 3} {'type': 'loss', 'content': 0.05032737925648689, 'timestamp': '2025-10-01 04:36:52.242215', 'step': 19451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:52.273931', 'step': 19451, 'epoch': 3} {'type': 'loss', 'content': 0.08987561613321304, 'timestamp': '2025-10-01 04:36:52.297754', 'step': 19452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:52.340847', 'step': 19452, 'epoch': 3} {'type': 'loss', 'content': 0.0320722870528698, 'timestamp': '2025-10-01 04:36:52.343071', 'step': 19453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:52.373855', 'step': 19453, 'epoch': 3} {'type': 'loss', 'content': 0.16169342398643494, 'timestamp': '2025-10-01 04:36:52.376183', 'step': 19454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:52.407121', 'step': 19454, 'epoch': 3} {'type': 'loss', 'content': 0.08440007269382477, 'timestamp': '2025-10-01 04:36:52.409498', 'step': 19455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:52.439694', 'step': 19455, 'epoch': 3} {'type': 'loss', 'content': 0.09502943605184555, 'timestamp': '2025-10-01 04:36:52.463582', 'step': 19456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:52.509912', 'step': 19456, 'epoch': 3} {'type': 'loss', 'content': 0.057327207177877426, 'timestamp': '2025-10-01 04:36:52.512122', 'step': 19457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:52.542565', 'step': 19457, 'epoch': 3} {'type': 'loss', 'content': 0.09409231692552567, 'timestamp': '2025-10-01 04:36:52.548852', 'step': 19458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:52.580001', 'step': 19458, 'epoch': 3} {'type': 'loss', 'content': 0.06359166651964188, 'timestamp': '2025-10-01 04:36:52.583518', 'step': 19459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:52.616380', 'step': 19459, 'epoch': 3} {'type': 'loss', 'content': 0.0583493746817112, 'timestamp': '2025-10-01 04:36:52.640137', 'step': 19460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:52.671578', 'step': 19460, 'epoch': 3} {'type': 'loss', 'content': 0.09689921140670776, 'timestamp': '2025-10-01 04:36:52.673876', 'step': 19461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:36:52.708807', 'step': 19461, 'epoch': 3} {'type': 'loss', 'content': 0.0636981800198555, 'timestamp': '2025-10-01 04:36:52.713025', 'step': 19462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:52.745115', 'step': 19462, 'epoch': 3} {'type': 'loss', 'content': 0.16312959790229797, 'timestamp': '2025-10-01 04:36:52.747419', 'step': 19463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:52.779437', 'step': 19463, 'epoch': 3} {'type': 'loss', 'content': 0.02841690555214882, 'timestamp': '2025-10-01 04:36:52.804007', 'step': 19464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:52.836840', 'step': 19464, 'epoch': 3} {'type': 'loss', 'content': 0.12713229656219482, 'timestamp': '2025-10-01 04:36:52.839776', 'step': 19465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:52.878729', 'step': 19465, 'epoch': 3} {'type': 'loss', 'content': 0.055790264159440994, 'timestamp': '2025-10-01 04:36:52.886029', 'step': 19466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:52.917961', 'step': 19466, 'epoch': 3} {'type': 'loss', 'content': 0.07335633784532547, 'timestamp': '2025-10-01 04:36:52.937151', 'step': 19467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:52.969324', 'step': 19467, 'epoch': 3} {'type': 'loss', 'content': 0.09505722671747208, 'timestamp': '2025-10-01 04:36:52.993136', 'step': 19468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:53.026588', 'step': 19468, 'epoch': 3} {'type': 'loss', 'content': 0.058745741844177246, 'timestamp': '2025-10-01 04:36:53.029017', 'step': 19469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:53.067048', 'step': 19469, 'epoch': 3} {'type': 'loss', 'content': 0.11519130319356918, 'timestamp': '2025-10-01 04:36:53.069224', 'step': 19470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:53.102984', 'step': 19470, 'epoch': 3} {'type': 'loss', 'content': 0.01703416369855404, 'timestamp': '2025-10-01 04:36:53.105421', 'step': 19471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:53.136937', 'step': 19471, 'epoch': 3} {'type': 'loss', 'content': 0.030850285664200783, 'timestamp': '2025-10-01 04:36:53.162335', 'step': 19472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:53.193504', 'step': 19472, 'epoch': 3} {'type': 'loss', 'content': 0.057059504091739655, 'timestamp': '2025-10-01 04:36:53.196473', 'step': 19473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:53.239401', 'step': 19473, 'epoch': 3} {'type': 'loss', 'content': 0.05898023024201393, 'timestamp': '2025-10-01 04:36:53.242107', 'step': 19474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:53.274202', 'step': 19474, 'epoch': 3} {'type': 'loss', 'content': 0.0773761048913002, 'timestamp': '2025-10-01 04:36:53.279102', 'step': 19475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:36:53.317044', 'step': 19475, 'epoch': 3} {'type': 'loss', 'content': 0.03735686466097832, 'timestamp': '2025-10-01 04:36:53.341011', 'step': 19476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:53.372653', 'step': 19476, 'epoch': 3} {'type': 'loss', 'content': 0.10001137852668762, 'timestamp': '2025-10-01 04:36:53.377914', 'step': 19477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:53.421440', 'step': 19477, 'epoch': 3} {'type': 'loss', 'content': 0.05780961737036705, 'timestamp': '2025-10-01 04:36:53.423943', 'step': 19478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:53.456569', 'step': 19478, 'epoch': 3} {'type': 'loss', 'content': 0.033977068960666656, 'timestamp': '2025-10-01 04:36:53.459066', 'step': 19479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:53.490997', 'step': 19479, 'epoch': 3} {'type': 'loss', 'content': 0.049444831907749176, 'timestamp': '2025-10-01 04:36:53.514642', 'step': 19480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:53.545615', 'step': 19480, 'epoch': 3} {'type': 'loss', 'content': 0.031062312424182892, 'timestamp': '2025-10-01 04:36:53.548638', 'step': 19481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:53.580384', 'step': 19481, 'epoch': 3} {'type': 'loss', 'content': 0.0342484712600708, 'timestamp': '2025-10-01 04:36:53.582596', 'step': 19482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:53.614541', 'step': 19482, 'epoch': 3} {'type': 'loss', 'content': 0.06942547112703323, 'timestamp': '2025-10-01 04:36:53.617111', 'step': 19483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:53.648197', 'step': 19483, 'epoch': 3} {'type': 'loss', 'content': 0.0898844376206398, 'timestamp': '2025-10-01 04:36:53.671982', 'step': 19484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:53.703929', 'step': 19484, 'epoch': 3} {'type': 'loss', 'content': 0.10690692067146301, 'timestamp': '2025-10-01 04:36:53.709826', 'step': 19485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:53.741917', 'step': 19485, 'epoch': 3} {'type': 'loss', 'content': 0.048039842396974564, 'timestamp': '2025-10-01 04:36:53.756779', 'step': 19486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:53.787277', 'step': 19486, 'epoch': 3} {'type': 'loss', 'content': 0.08095240592956543, 'timestamp': '2025-10-01 04:36:53.789595', 'step': 19487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:53.820254', 'step': 19487, 'epoch': 3} {'type': 'loss', 'content': 0.038065697997808456, 'timestamp': '2025-10-01 04:36:53.844288', 'step': 19488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:53.877024', 'step': 19488, 'epoch': 3} {'type': 'loss', 'content': 0.04088759794831276, 'timestamp': '2025-10-01 04:36:53.879359', 'step': 19489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:53.911231', 'step': 19489, 'epoch': 3} {'type': 'loss', 'content': 0.11091268807649612, 'timestamp': '2025-10-01 04:36:53.913569', 'step': 19490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:53.945110', 'step': 19490, 'epoch': 3} {'type': 'loss', 'content': 0.020574772730469704, 'timestamp': '2025-10-01 04:36:53.947506', 'step': 19491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:53.980131', 'step': 19491, 'epoch': 3} {'type': 'loss', 'content': 0.0669979602098465, 'timestamp': '2025-10-01 04:36:54.003839', 'step': 19492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:54.035451', 'step': 19492, 'epoch': 3} {'type': 'loss', 'content': 0.12919911742210388, 'timestamp': '2025-10-01 04:36:54.037669', 'step': 19493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:54.070950', 'step': 19493, 'epoch': 3} {'type': 'loss', 'content': 0.07165364921092987, 'timestamp': '2025-10-01 04:36:54.073316', 'step': 19494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:54.115338', 'step': 19494, 'epoch': 3} {'type': 'loss', 'content': 0.05999104306101799, 'timestamp': '2025-10-01 04:36:54.117661', 'step': 19495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:54.148013', 'step': 19495, 'epoch': 3} {'type': 'loss', 'content': 0.03260092809796333, 'timestamp': '2025-10-01 04:36:54.171876', 'step': 19496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:36:54.204264', 'step': 19496, 'epoch': 3} {'type': 'loss', 'content': 0.0571124367415905, 'timestamp': '2025-10-01 04:36:54.206588', 'step': 19497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:54.236970', 'step': 19497, 'epoch': 3} {'type': 'loss', 'content': 0.07034388184547424, 'timestamp': '2025-10-01 04:36:54.239516', 'step': 19498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:54.270667', 'step': 19498, 'epoch': 3} {'type': 'loss', 'content': 0.02055400237441063, 'timestamp': '2025-10-01 04:36:54.273107', 'step': 19499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:54.304229', 'step': 19499, 'epoch': 3} {'type': 'loss', 'content': 0.05634955316781998, 'timestamp': '2025-10-01 04:36:54.327894', 'step': 19500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19500', 'timestamp': '2025-10-01 04:36:59.667873', 'step': 19500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:36:59.716468', 'step': 19500, 'epoch': 3} {'type': 'loss', 'content': 0.0348588190972805, 'timestamp': '2025-10-01 04:36:59.718846', 'step': 19501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:59.756010', 'step': 19501, 'epoch': 3} {'type': 'loss', 'content': 0.10396799445152283, 'timestamp': '2025-10-01 04:36:59.758372', 'step': 19502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:36:59.794519', 'step': 19502, 'epoch': 3} {'type': 'loss', 'content': 0.17436441779136658, 'timestamp': '2025-10-01 04:36:59.796871', 'step': 19503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:36:59.846863', 'step': 19503, 'epoch': 3} {'type': 'loss', 'content': 0.029055600985884666, 'timestamp': '2025-10-01 04:36:59.870710', 'step': 19504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:59.906977', 'step': 19504, 'epoch': 3} {'type': 'loss', 'content': 0.09596740454435349, 'timestamp': '2025-10-01 04:36:59.910662', 'step': 19505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:59.944114', 'step': 19505, 'epoch': 3} {'type': 'loss', 'content': 0.04871140420436859, 'timestamp': '2025-10-01 04:36:59.947640', 'step': 19506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:36:59.995043', 'step': 19506, 'epoch': 3} {'type': 'loss', 'content': 0.025582993403077126, 'timestamp': '2025-10-01 04:36:59.997042', 'step': 19507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:00.041579', 'step': 19507, 'epoch': 3} {'type': 'loss', 'content': 0.06765371561050415, 'timestamp': '2025-10-01 04:37:00.067274', 'step': 19508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:00.106828', 'step': 19508, 'epoch': 3} {'type': 'loss', 'content': 0.06308066099882126, 'timestamp': '2025-10-01 04:37:00.108959', 'step': 19509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:00.142015', 'step': 19509, 'epoch': 3} {'type': 'loss', 'content': 0.10135535150766373, 'timestamp': '2025-10-01 04:37:00.144445', 'step': 19510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:00.178804', 'step': 19510, 'epoch': 3} {'type': 'loss', 'content': 0.018699243664741516, 'timestamp': '2025-10-01 04:37:00.181173', 'step': 19511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:37:00.216236', 'step': 19511, 'epoch': 3} {'type': 'loss', 'content': 0.016850978136062622, 'timestamp': '2025-10-01 04:37:00.241356', 'step': 19512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:00.276261', 'step': 19512, 'epoch': 3} {'type': 'loss', 'content': 0.04254172742366791, 'timestamp': '2025-10-01 04:37:00.278457', 'step': 19513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:00.317136', 'step': 19513, 'epoch': 3} {'type': 'loss', 'content': 0.13234370946884155, 'timestamp': '2025-10-01 04:37:00.319408', 'step': 19514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:00.358646', 'step': 19514, 'epoch': 3} {'type': 'loss', 'content': 0.06252779066562653, 'timestamp': '2025-10-01 04:37:00.360929', 'step': 19515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:00.394047', 'step': 19515, 'epoch': 3} {'type': 'loss', 'content': 0.040362078696489334, 'timestamp': '2025-10-01 04:37:00.417832', 'step': 19516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:00.453435', 'step': 19516, 'epoch': 3} {'type': 'loss', 'content': 0.02187558077275753, 'timestamp': '2025-10-01 04:37:00.456241', 'step': 19517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:00.490987', 'step': 19517, 'epoch': 3} {'type': 'loss', 'content': 0.03923580050468445, 'timestamp': '2025-10-01 04:37:00.498698', 'step': 19518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:00.532456', 'step': 19518, 'epoch': 3} {'type': 'loss', 'content': 0.06844272464513779, 'timestamp': '2025-10-01 04:37:00.535128', 'step': 19519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:00.572961', 'step': 19519, 'epoch': 3} {'type': 'loss', 'content': 0.11237487196922302, 'timestamp': '2025-10-01 04:37:00.611347', 'step': 19520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:00.647580', 'step': 19520, 'epoch': 3} {'type': 'loss', 'content': 0.03685364872217178, 'timestamp': '2025-10-01 04:37:00.649780', 'step': 19521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:00.685039', 'step': 19521, 'epoch': 3} {'type': 'loss', 'content': 0.043800946325063705, 'timestamp': '2025-10-01 04:37:00.687163', 'step': 19522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:00.725720', 'step': 19522, 'epoch': 3} {'type': 'loss', 'content': 0.09204670041799545, 'timestamp': '2025-10-01 04:37:00.728636', 'step': 19523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:00.763241', 'step': 19523, 'epoch': 3} {'type': 'loss', 'content': 0.07517767697572708, 'timestamp': '2025-10-01 04:37:00.786880', 'step': 19524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:00.826123', 'step': 19524, 'epoch': 3} {'type': 'loss', 'content': 0.04210585355758667, 'timestamp': '2025-10-01 04:37:00.828472', 'step': 19525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:00.870528', 'step': 19525, 'epoch': 3} {'type': 'loss', 'content': 0.06148749962449074, 'timestamp': '2025-10-01 04:37:00.872911', 'step': 19526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:00.906680', 'step': 19526, 'epoch': 3} {'type': 'loss', 'content': 0.051896341145038605, 'timestamp': '2025-10-01 04:37:00.924990', 'step': 19527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:00.966831', 'step': 19527, 'epoch': 3} {'type': 'loss', 'content': 0.050001297146081924, 'timestamp': '2025-10-01 04:37:00.990695', 'step': 19528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:01.037961', 'step': 19528, 'epoch': 3} {'type': 'loss', 'content': 0.020079731941223145, 'timestamp': '2025-10-01 04:37:01.040268', 'step': 19529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:01.077484', 'step': 19529, 'epoch': 3} {'type': 'loss', 'content': 0.10720638930797577, 'timestamp': '2025-10-01 04:37:01.079993', 'step': 19530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:01.113918', 'step': 19530, 'epoch': 3} {'type': 'loss', 'content': 0.07096201181411743, 'timestamp': '2025-10-01 04:37:01.116262', 'step': 19531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:01.150090', 'step': 19531, 'epoch': 3} {'type': 'loss', 'content': 0.10717857629060745, 'timestamp': '2025-10-01 04:37:01.174441', 'step': 19532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:01.205862', 'step': 19532, 'epoch': 3} {'type': 'loss', 'content': 0.11631527543067932, 'timestamp': '2025-10-01 04:37:01.211536', 'step': 19533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:01.246612', 'step': 19533, 'epoch': 3} {'type': 'loss', 'content': 0.05307496711611748, 'timestamp': '2025-10-01 04:37:01.248800', 'step': 19534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:01.285164', 'step': 19534, 'epoch': 3} {'type': 'loss', 'content': 0.044733770191669464, 'timestamp': '2025-10-01 04:37:01.287454', 'step': 19535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:01.317992', 'step': 19535, 'epoch': 3} {'type': 'loss', 'content': 0.034635670483112335, 'timestamp': '2025-10-01 04:37:01.342495', 'step': 19536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:01.374540', 'step': 19536, 'epoch': 3} {'type': 'loss', 'content': 0.030465202406048775, 'timestamp': '2025-10-01 04:37:01.376769', 'step': 19537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:01.407318', 'step': 19537, 'epoch': 3} {'type': 'loss', 'content': 0.02986033819615841, 'timestamp': '2025-10-01 04:37:01.412100', 'step': 19538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:01.444534', 'step': 19538, 'epoch': 3} {'type': 'loss', 'content': 0.04092998430132866, 'timestamp': '2025-10-01 04:37:01.446891', 'step': 19539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:01.480466', 'step': 19539, 'epoch': 3} {'type': 'loss', 'content': 0.12500295042991638, 'timestamp': '2025-10-01 04:37:01.507380', 'step': 19540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:01.539217', 'step': 19540, 'epoch': 3} {'type': 'loss', 'content': 0.04389403760433197, 'timestamp': '2025-10-01 04:37:01.541399', 'step': 19541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:01.574550', 'step': 19541, 'epoch': 3} {'type': 'loss', 'content': 0.055179815739393234, 'timestamp': '2025-10-01 04:37:01.580753', 'step': 19542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:01.612336', 'step': 19542, 'epoch': 3} {'type': 'loss', 'content': 0.05634477362036705, 'timestamp': '2025-10-01 04:37:01.614513', 'step': 19543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:01.653846', 'step': 19543, 'epoch': 3} {'type': 'loss', 'content': 0.05911184474825859, 'timestamp': '2025-10-01 04:37:01.677807', 'step': 19544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:01.711041', 'step': 19544, 'epoch': 3} {'type': 'loss', 'content': 0.13020020723342896, 'timestamp': '2025-10-01 04:37:01.720153', 'step': 19545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:01.753552', 'step': 19545, 'epoch': 3} {'type': 'loss', 'content': 0.126454159617424, 'timestamp': '2025-10-01 04:37:01.756959', 'step': 19546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:01.803154', 'step': 19546, 'epoch': 3} {'type': 'loss', 'content': 0.043742936104536057, 'timestamp': '2025-10-01 04:37:01.805119', 'step': 19547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:01.837270', 'step': 19547, 'epoch': 3} {'type': 'loss', 'content': 0.014803904108703136, 'timestamp': '2025-10-01 04:37:01.861415', 'step': 19548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:01.904510', 'step': 19548, 'epoch': 3} {'type': 'loss', 'content': 0.06682198494672775, 'timestamp': '2025-10-01 04:37:01.907225', 'step': 19549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:01.945241', 'step': 19549, 'epoch': 3} {'type': 'loss', 'content': 0.08352220803499222, 'timestamp': '2025-10-01 04:37:01.948088', 'step': 19550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:01.996218', 'step': 19550, 'epoch': 3} {'type': 'loss', 'content': 0.04370493441820145, 'timestamp': '2025-10-01 04:37:01.998702', 'step': 19551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:02.040865', 'step': 19551, 'epoch': 3} {'type': 'loss', 'content': 0.060682687908411026, 'timestamp': '2025-10-01 04:37:02.064896', 'step': 19552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:02.096103', 'step': 19552, 'epoch': 3} {'type': 'loss', 'content': 0.08370465785264969, 'timestamp': '2025-10-01 04:37:02.098345', 'step': 19553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:02.131933', 'step': 19553, 'epoch': 3} {'type': 'loss', 'content': 0.031150978058576584, 'timestamp': '2025-10-01 04:37:02.134637', 'step': 19554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:02.178447', 'step': 19554, 'epoch': 3} {'type': 'loss', 'content': 0.02488521859049797, 'timestamp': '2025-10-01 04:37:02.184580', 'step': 19555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:02.240032', 'step': 19555, 'epoch': 3} {'type': 'loss', 'content': 0.04597584903240204, 'timestamp': '2025-10-01 04:37:02.279898', 'step': 19556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:02.312532', 'step': 19556, 'epoch': 3} {'type': 'loss', 'content': 0.13283966481685638, 'timestamp': '2025-10-01 04:37:02.315127', 'step': 19557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:02.349309', 'step': 19557, 'epoch': 3} {'type': 'loss', 'content': 0.10689368844032288, 'timestamp': '2025-10-01 04:37:02.351619', 'step': 19558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:02.391083', 'step': 19558, 'epoch': 3} {'type': 'loss', 'content': 0.16386401653289795, 'timestamp': '2025-10-01 04:37:02.393578', 'step': 19559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:02.450687', 'step': 19559, 'epoch': 3} {'type': 'loss', 'content': 0.14198985695838928, 'timestamp': '2025-10-01 04:37:02.476461', 'step': 19560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:02.518337', 'step': 19560, 'epoch': 3} {'type': 'loss', 'content': 0.06158997118473053, 'timestamp': '2025-10-01 04:37:02.520837', 'step': 19561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:02.553415', 'step': 19561, 'epoch': 3} {'type': 'loss', 'content': 0.03993747755885124, 'timestamp': '2025-10-01 04:37:02.555976', 'step': 19562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:02.592038', 'step': 19562, 'epoch': 3} {'type': 'loss', 'content': 0.03364146873354912, 'timestamp': '2025-10-01 04:37:02.594198', 'step': 19563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:02.638582', 'step': 19563, 'epoch': 3} {'type': 'loss', 'content': 0.0677991658449173, 'timestamp': '2025-10-01 04:37:02.662432', 'step': 19564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:02.705421', 'step': 19564, 'epoch': 3} {'type': 'loss', 'content': 0.07296952605247498, 'timestamp': '2025-10-01 04:37:02.707493', 'step': 19565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:02.757001', 'step': 19565, 'epoch': 3} {'type': 'loss', 'content': 0.054428957402706146, 'timestamp': '2025-10-01 04:37:02.762738', 'step': 19566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:02.807131', 'step': 19566, 'epoch': 3} {'type': 'loss', 'content': 0.09752574563026428, 'timestamp': '2025-10-01 04:37:02.809629', 'step': 19567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:02.843430', 'step': 19567, 'epoch': 3} {'type': 'loss', 'content': 0.06048610061407089, 'timestamp': '2025-10-01 04:37:02.867295', 'step': 19568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:02.901366', 'step': 19568, 'epoch': 3} {'type': 'loss', 'content': 0.008462862111628056, 'timestamp': '2025-10-01 04:37:02.904804', 'step': 19569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:02.943455', 'step': 19569, 'epoch': 3} {'type': 'loss', 'content': 0.06292201578617096, 'timestamp': '2025-10-01 04:37:02.945698', 'step': 19570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:02.978176', 'step': 19570, 'epoch': 3} {'type': 'loss', 'content': 0.13641983270645142, 'timestamp': '2025-10-01 04:37:02.980616', 'step': 19571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:03.031727', 'step': 19571, 'epoch': 3} {'type': 'loss', 'content': 0.027530746534466743, 'timestamp': '2025-10-01 04:37:03.057973', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:37:14.732743', 'step': 19572, 'epoch': 3} {'type': 'pplx', 'content': 10102.480242118041, 'timestamp': '2025-10-01 04:37:14.735800', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:14.766519', 'step': 19572, 'epoch': 3} {'type': 'loss', 'content': 0.0648387223482132, 'timestamp': '2025-10-01 04:37:14.768867', 'step': 19573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:14.802968', 'step': 19573, 'epoch': 3} {'type': 'loss', 'content': 0.11157147586345673, 'timestamp': '2025-10-01 04:37:14.805344', 'step': 19574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:14.844616', 'step': 19574, 'epoch': 3} {'type': 'loss', 'content': 0.09848232567310333, 'timestamp': '2025-10-01 04:37:14.846832', 'step': 19575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:14.883694', 'step': 19575, 'epoch': 3} {'type': 'loss', 'content': 0.041207801550626755, 'timestamp': '2025-10-01 04:37:14.907885', 'step': 19576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:14.944964', 'step': 19576, 'epoch': 3} {'type': 'loss', 'content': 0.06261494010686874, 'timestamp': '2025-10-01 04:37:14.949453', 'step': 19577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:14.999597', 'step': 19577, 'epoch': 3} {'type': 'loss', 'content': 0.13736388087272644, 'timestamp': '2025-10-01 04:37:15.002051', 'step': 19578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:15.052289', 'step': 19578, 'epoch': 3} {'type': 'loss', 'content': 0.051469068974256516, 'timestamp': '2025-10-01 04:37:15.054850', 'step': 19579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:15.089919', 'step': 19579, 'epoch': 3} {'type': 'loss', 'content': 0.12367881089448929, 'timestamp': '2025-10-01 04:37:15.113875', 'step': 19580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:15.159883', 'step': 19580, 'epoch': 3} {'type': 'loss', 'content': 0.03626171499490738, 'timestamp': '2025-10-01 04:37:15.162258', 'step': 19581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:15.196909', 'step': 19581, 'epoch': 3} {'type': 'loss', 'content': 0.04055497422814369, 'timestamp': '2025-10-01 04:37:15.198954', 'step': 19582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:15.233348', 'step': 19582, 'epoch': 3} {'type': 'loss', 'content': 0.09537787735462189, 'timestamp': '2025-10-01 04:37:15.236431', 'step': 19583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:15.286129', 'step': 19583, 'epoch': 3} {'type': 'loss', 'content': 0.05165640264749527, 'timestamp': '2025-10-01 04:37:15.309916', 'step': 19584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:15.347373', 'step': 19584, 'epoch': 3} {'type': 'loss', 'content': 0.05604316666722298, 'timestamp': '2025-10-01 04:37:15.349461', 'step': 19585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:15.388510', 'step': 19585, 'epoch': 3} {'type': 'loss', 'content': 0.05615532398223877, 'timestamp': '2025-10-01 04:37:15.390622', 'step': 19586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:15.425722', 'step': 19586, 'epoch': 3} {'type': 'loss', 'content': 0.026495056226849556, 'timestamp': '2025-10-01 04:37:15.428128', 'step': 19587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:15.477848', 'step': 19587, 'epoch': 3} {'type': 'loss', 'content': 0.06910815089941025, 'timestamp': '2025-10-01 04:37:15.501345', 'step': 19588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:15.564088', 'step': 19588, 'epoch': 3} {'type': 'loss', 'content': 0.07535824179649353, 'timestamp': '2025-10-01 04:37:15.565901', 'step': 19589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:15.601190', 'step': 19589, 'epoch': 3} {'type': 'loss', 'content': 0.08026010543107986, 'timestamp': '2025-10-01 04:37:15.603535', 'step': 19590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:15.638697', 'step': 19590, 'epoch': 3} {'type': 'loss', 'content': 0.06533102691173553, 'timestamp': '2025-10-01 04:37:15.640907', 'step': 19591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:15.676319', 'step': 19591, 'epoch': 3} {'type': 'loss', 'content': 0.11039217561483383, 'timestamp': '2025-10-01 04:37:15.700043', 'step': 19592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:15.732897', 'step': 19592, 'epoch': 3} {'type': 'loss', 'content': 0.05958482623100281, 'timestamp': '2025-10-01 04:37:15.735066', 'step': 19593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:15.771406', 'step': 19593, 'epoch': 3} {'type': 'loss', 'content': 0.05482906475663185, 'timestamp': '2025-10-01 04:37:15.773371', 'step': 19594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:15.808528', 'step': 19594, 'epoch': 3} {'type': 'loss', 'content': 0.12181451171636581, 'timestamp': '2025-10-01 04:37:15.811046', 'step': 19595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:15.873684', 'step': 19595, 'epoch': 3} {'type': 'loss', 'content': 0.07476914674043655, 'timestamp': '2025-10-01 04:37:15.897453', 'step': 19596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:15.930983', 'step': 19596, 'epoch': 3} {'type': 'loss', 'content': 0.08644966036081314, 'timestamp': '2025-10-01 04:37:15.933257', 'step': 19597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:15.967750', 'step': 19597, 'epoch': 3} {'type': 'loss', 'content': 0.0768868625164032, 'timestamp': '2025-10-01 04:37:15.970285', 'step': 19598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:16.037223', 'step': 19598, 'epoch': 3} {'type': 'loss', 'content': 0.13234388828277588, 'timestamp': '2025-10-01 04:37:16.042868', 'step': 19599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:16.092678', 'step': 19599, 'epoch': 3} {'type': 'loss', 'content': 0.04942653700709343, 'timestamp': '2025-10-01 04:37:16.116170', 'step': 19600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:16.166390', 'step': 19600, 'epoch': 3} {'type': 'loss', 'content': 0.03708821162581444, 'timestamp': '2025-10-01 04:37:16.168646', 'step': 19601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:16.218045', 'step': 19601, 'epoch': 3} {'type': 'loss', 'content': 0.0991341769695282, 'timestamp': '2025-10-01 04:37:16.220167', 'step': 19602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:16.254037', 'step': 19602, 'epoch': 3} {'type': 'loss', 'content': 0.012425062246620655, 'timestamp': '2025-10-01 04:37:16.269645', 'step': 19603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:16.302610', 'step': 19603, 'epoch': 3} {'type': 'loss', 'content': 0.07688827812671661, 'timestamp': '2025-10-01 04:37:16.326250', 'step': 19604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:16.361640', 'step': 19604, 'epoch': 3} {'type': 'loss', 'content': 0.06423050910234451, 'timestamp': '2025-10-01 04:37:16.365322', 'step': 19605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:16.398992', 'step': 19605, 'epoch': 3} {'type': 'loss', 'content': 0.06241415813565254, 'timestamp': '2025-10-01 04:37:16.401758', 'step': 19606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:16.450638', 'step': 19606, 'epoch': 3} {'type': 'loss', 'content': 0.05156267434358597, 'timestamp': '2025-10-01 04:37:16.453046', 'step': 19607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:16.490326', 'step': 19607, 'epoch': 3} {'type': 'loss', 'content': 0.0863613560795784, 'timestamp': '2025-10-01 04:37:16.514032', 'step': 19608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:16.551043', 'step': 19608, 'epoch': 3} {'type': 'loss', 'content': 0.03247683867812157, 'timestamp': '2025-10-01 04:37:16.553909', 'step': 19609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:16.602535', 'step': 19609, 'epoch': 3} {'type': 'loss', 'content': 0.028008578345179558, 'timestamp': '2025-10-01 04:37:16.605042', 'step': 19610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:16.637358', 'step': 19610, 'epoch': 3} {'type': 'loss', 'content': 0.07973381131887436, 'timestamp': '2025-10-01 04:37:16.639406', 'step': 19611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:16.689241', 'step': 19611, 'epoch': 3} {'type': 'loss', 'content': 0.027476340532302856, 'timestamp': '2025-10-01 04:37:16.713142', 'step': 19612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:16.782926', 'step': 19612, 'epoch': 3} {'type': 'loss', 'content': 0.04086705669760704, 'timestamp': '2025-10-01 04:37:16.785442', 'step': 19613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:16.818989', 'step': 19613, 'epoch': 3} {'type': 'loss', 'content': 0.13128235936164856, 'timestamp': '2025-10-01 04:37:16.820852', 'step': 19614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:16.854869', 'step': 19614, 'epoch': 3} {'type': 'loss', 'content': 0.031396038830280304, 'timestamp': '2025-10-01 04:37:16.857050', 'step': 19615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:16.890830', 'step': 19615, 'epoch': 3} {'type': 'loss', 'content': 0.05241716653108597, 'timestamp': '2025-10-01 04:37:16.914694', 'step': 19616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:16.979548', 'step': 19616, 'epoch': 3} {'type': 'loss', 'content': 0.029122961685061455, 'timestamp': '2025-10-01 04:37:16.981564', 'step': 19617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:17.022445', 'step': 19617, 'epoch': 3} {'type': 'loss', 'content': 0.09067459404468536, 'timestamp': '2025-10-01 04:37:17.025180', 'step': 19618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:17.060206', 'step': 19618, 'epoch': 3} {'type': 'loss', 'content': 0.07469552010297775, 'timestamp': '2025-10-01 04:37:17.063970', 'step': 19619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:17.112466', 'step': 19619, 'epoch': 3} {'type': 'loss', 'content': 0.15123425424098969, 'timestamp': '2025-10-01 04:37:17.136147', 'step': 19620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:17.185378', 'step': 19620, 'epoch': 3} {'type': 'loss', 'content': 0.07512610405683517, 'timestamp': '2025-10-01 04:37:17.187496', 'step': 19621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:17.235713', 'step': 19621, 'epoch': 3} {'type': 'loss', 'content': 0.024447903037071228, 'timestamp': '2025-10-01 04:37:17.237664', 'step': 19622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:17.273918', 'step': 19622, 'epoch': 3} {'type': 'loss', 'content': 0.05419115349650383, 'timestamp': '2025-10-01 04:37:17.275926', 'step': 19623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:17.322122', 'step': 19623, 'epoch': 3} {'type': 'loss', 'content': 0.03404497727751732, 'timestamp': '2025-10-01 04:37:17.346079', 'step': 19624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:17.380416', 'step': 19624, 'epoch': 3} {'type': 'loss', 'content': 0.07228291034698486, 'timestamp': '2025-10-01 04:37:17.382553', 'step': 19625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:17.417529', 'step': 19625, 'epoch': 3} {'type': 'loss', 'content': 0.045918870717287064, 'timestamp': '2025-10-01 04:37:17.420127', 'step': 19626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:17.455056', 'step': 19626, 'epoch': 3} {'type': 'loss', 'content': 0.02195633575320244, 'timestamp': '2025-10-01 04:37:17.457499', 'step': 19627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:17.502087', 'step': 19627, 'epoch': 3} {'type': 'loss', 'content': 0.07299685478210449, 'timestamp': '2025-10-01 04:37:17.525898', 'step': 19628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:17.559774', 'step': 19628, 'epoch': 3} {'type': 'loss', 'content': 0.03135932609438896, 'timestamp': '2025-10-01 04:37:17.561749', 'step': 19629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:17.620245', 'step': 19629, 'epoch': 3} {'type': 'loss', 'content': 0.06133504956960678, 'timestamp': '2025-10-01 04:37:17.624626', 'step': 19630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:17.675615', 'step': 19630, 'epoch': 3} {'type': 'loss', 'content': 0.0983167439699173, 'timestamp': '2025-10-01 04:37:17.677845', 'step': 19631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:17.712008', 'step': 19631, 'epoch': 3} {'type': 'loss', 'content': 0.07062715291976929, 'timestamp': '2025-10-01 04:37:17.735620', 'step': 19632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:17.784183', 'step': 19632, 'epoch': 3} {'type': 'loss', 'content': 0.14429907500743866, 'timestamp': '2025-10-01 04:37:17.786872', 'step': 19633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:17.840367', 'step': 19633, 'epoch': 3} {'type': 'loss', 'content': 0.20947763323783875, 'timestamp': '2025-10-01 04:37:17.844438', 'step': 19634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:17.882100', 'step': 19634, 'epoch': 3} {'type': 'loss', 'content': 0.16974890232086182, 'timestamp': '2025-10-01 04:37:17.886282', 'step': 19635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:17.922729', 'step': 19635, 'epoch': 3} {'type': 'loss', 'content': 0.11916507035493851, 'timestamp': '2025-10-01 04:37:17.947124', 'step': 19636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:17.990737', 'step': 19636, 'epoch': 3} {'type': 'loss', 'content': 0.0865323469042778, 'timestamp': '2025-10-01 04:37:17.992918', 'step': 19637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:18.030576', 'step': 19637, 'epoch': 3} {'type': 'loss', 'content': 0.04661110043525696, 'timestamp': '2025-10-01 04:37:18.033005', 'step': 19638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:18.067868', 'step': 19638, 'epoch': 3} {'type': 'loss', 'content': 0.08793432265520096, 'timestamp': '2025-10-01 04:37:18.070524', 'step': 19639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:18.108932', 'step': 19639, 'epoch': 3} {'type': 'loss', 'content': 0.01852516271173954, 'timestamp': '2025-10-01 04:37:18.132658', 'step': 19640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:18.180589', 'step': 19640, 'epoch': 3} {'type': 'loss', 'content': 0.14140217006206512, 'timestamp': '2025-10-01 04:37:18.182925', 'step': 19641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:18.223279', 'step': 19641, 'epoch': 3} {'type': 'loss', 'content': 0.04399651288986206, 'timestamp': '2025-10-01 04:37:18.225770', 'step': 19642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:18.261675', 'step': 19642, 'epoch': 3} {'type': 'loss', 'content': 0.03735143691301346, 'timestamp': '2025-10-01 04:37:18.264021', 'step': 19643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:18.311963', 'step': 19643, 'epoch': 3} {'type': 'loss', 'content': 0.018919197842478752, 'timestamp': '2025-10-01 04:37:18.335650', 'step': 19644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:18.384801', 'step': 19644, 'epoch': 3} {'type': 'loss', 'content': 0.03668023645877838, 'timestamp': '2025-10-01 04:37:18.390180', 'step': 19645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:18.454907', 'step': 19645, 'epoch': 3} {'type': 'loss', 'content': 0.14409475028514862, 'timestamp': '2025-10-01 04:37:18.457246', 'step': 19646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:18.506300', 'step': 19646, 'epoch': 3} {'type': 'loss', 'content': 0.11824557930231094, 'timestamp': '2025-10-01 04:37:18.509812', 'step': 19647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:18.544164', 'step': 19647, 'epoch': 3} {'type': 'loss', 'content': 0.17677214741706848, 'timestamp': '2025-10-01 04:37:18.567659', 'step': 19648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:18.619452', 'step': 19648, 'epoch': 3} {'type': 'loss', 'content': 0.03217922896146774, 'timestamp': '2025-10-01 04:37:18.621605', 'step': 19649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:18.657919', 'step': 19649, 'epoch': 3} {'type': 'loss', 'content': 0.07926501333713531, 'timestamp': '2025-10-01 04:37:18.661161', 'step': 19650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:18.708646', 'step': 19650, 'epoch': 3} {'type': 'loss', 'content': 0.08282296359539032, 'timestamp': '2025-10-01 04:37:18.710747', 'step': 19651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:18.746598', 'step': 19651, 'epoch': 3} {'type': 'loss', 'content': 0.03909079730510712, 'timestamp': '2025-10-01 04:37:18.770392', 'step': 19652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:18.804155', 'step': 19652, 'epoch': 3} {'type': 'loss', 'content': 0.0782620757818222, 'timestamp': '2025-10-01 04:37:18.806312', 'step': 19653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:18.842810', 'step': 19653, 'epoch': 3} {'type': 'loss', 'content': 0.03535056114196777, 'timestamp': '2025-10-01 04:37:18.845052', 'step': 19654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:18.878216', 'step': 19654, 'epoch': 3} {'type': 'loss', 'content': 0.12785957753658295, 'timestamp': '2025-10-01 04:37:18.880978', 'step': 19655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:18.915495', 'step': 19655, 'epoch': 3} {'type': 'loss', 'content': 0.06061641871929169, 'timestamp': '2025-10-01 04:37:18.939361', 'step': 19656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:18.971699', 'step': 19656, 'epoch': 3} {'type': 'loss', 'content': 0.026512598618865013, 'timestamp': '2025-10-01 04:37:18.973798', 'step': 19657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:19.007905', 'step': 19657, 'epoch': 3} {'type': 'loss', 'content': 0.044106390327215195, 'timestamp': '2025-10-01 04:37:19.009797', 'step': 19658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:19.042485', 'step': 19658, 'epoch': 3} {'type': 'loss', 'content': 0.019894743338227272, 'timestamp': '2025-10-01 04:37:19.044645', 'step': 19659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:19.095374', 'step': 19659, 'epoch': 3} {'type': 'loss', 'content': 0.06423131376504898, 'timestamp': '2025-10-01 04:37:19.119761', 'step': 19660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:19.157158', 'step': 19660, 'epoch': 3} {'type': 'loss', 'content': 0.10856842249631882, 'timestamp': '2025-10-01 04:37:19.160000', 'step': 19661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:19.194175', 'step': 19661, 'epoch': 3} {'type': 'loss', 'content': 0.03490970656275749, 'timestamp': '2025-10-01 04:37:19.196390', 'step': 19662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:19.243511', 'step': 19662, 'epoch': 3} {'type': 'loss', 'content': 0.06499451398849487, 'timestamp': '2025-10-01 04:37:19.245598', 'step': 19663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:19.279558', 'step': 19663, 'epoch': 3} {'type': 'loss', 'content': 0.020791364833712578, 'timestamp': '2025-10-01 04:37:19.303128', 'step': 19664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:19.338024', 'step': 19664, 'epoch': 3} {'type': 'loss', 'content': 0.030461058020591736, 'timestamp': '2025-10-01 04:37:19.340153', 'step': 19665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:19.387325', 'step': 19665, 'epoch': 3} {'type': 'loss', 'content': 0.12508195638656616, 'timestamp': '2025-10-01 04:37:19.389483', 'step': 19666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:19.440552', 'step': 19666, 'epoch': 3} {'type': 'loss', 'content': 0.06216917186975479, 'timestamp': '2025-10-01 04:37:19.442516', 'step': 19667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:19.477235', 'step': 19667, 'epoch': 3} {'type': 'loss', 'content': 0.020494362339377403, 'timestamp': '2025-10-01 04:37:19.500796', 'step': 19668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:19.566060', 'step': 19668, 'epoch': 3} {'type': 'loss', 'content': 0.057499535381793976, 'timestamp': '2025-10-01 04:37:19.568404', 'step': 19669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:19.633875', 'step': 19669, 'epoch': 3} {'type': 'loss', 'content': 0.03316130489110947, 'timestamp': '2025-10-01 04:37:19.636190', 'step': 19670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:19.682263', 'step': 19670, 'epoch': 3} {'type': 'loss', 'content': 0.14306919276714325, 'timestamp': '2025-10-01 04:37:19.684350', 'step': 19671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:19.749207', 'step': 19671, 'epoch': 3} {'type': 'loss', 'content': 0.1284685730934143, 'timestamp': '2025-10-01 04:37:19.772788', 'step': 19672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:19.826226', 'step': 19672, 'epoch': 3} {'type': 'loss', 'content': 0.10685515403747559, 'timestamp': '2025-10-01 04:37:19.828315', 'step': 19673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:19.888920', 'step': 19673, 'epoch': 3} {'type': 'loss', 'content': 0.11067841202020645, 'timestamp': '2025-10-01 04:37:19.891075', 'step': 19674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:19.947070', 'step': 19674, 'epoch': 3} {'type': 'loss', 'content': 0.09510210156440735, 'timestamp': '2025-10-01 04:37:19.949032', 'step': 19675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:20.002203', 'step': 19675, 'epoch': 3} {'type': 'loss', 'content': 0.11127447336912155, 'timestamp': '2025-10-01 04:37:20.025818', 'step': 19676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:20.095629', 'step': 19676, 'epoch': 3} {'type': 'loss', 'content': 0.05711054056882858, 'timestamp': '2025-10-01 04:37:20.100469', 'step': 19677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:20.162772', 'step': 19677, 'epoch': 3} {'type': 'loss', 'content': 0.09195654839277267, 'timestamp': '2025-10-01 04:37:20.166254', 'step': 19678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:20.229833', 'step': 19678, 'epoch': 3} {'type': 'loss', 'content': 0.0590834841132164, 'timestamp': '2025-10-01 04:37:20.231880', 'step': 19679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:20.285330', 'step': 19679, 'epoch': 3} {'type': 'loss', 'content': 0.06214200332760811, 'timestamp': '2025-10-01 04:37:20.309068', 'step': 19680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:20.422074', 'step': 19680, 'epoch': 3} {'type': 'loss', 'content': 0.08548889309167862, 'timestamp': '2025-10-01 04:37:20.427381', 'step': 19681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:20.524187', 'step': 19681, 'epoch': 3} {'type': 'loss', 'content': 0.03099045716226101, 'timestamp': '2025-10-01 04:37:20.526741', 'step': 19682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:20.599354', 'step': 19682, 'epoch': 3} {'type': 'loss', 'content': 0.029236232861876488, 'timestamp': '2025-10-01 04:37:20.603687', 'step': 19683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:20.651809', 'step': 19683, 'epoch': 3} {'type': 'loss', 'content': 0.06387640535831451, 'timestamp': '2025-10-01 04:37:20.677066', 'step': 19684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:20.728599', 'step': 19684, 'epoch': 3} {'type': 'loss', 'content': 0.09790544956922531, 'timestamp': '2025-10-01 04:37:20.731124', 'step': 19685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:20.771983', 'step': 19685, 'epoch': 3} {'type': 'loss', 'content': 0.11041539907455444, 'timestamp': '2025-10-01 04:37:20.774431', 'step': 19686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:20.810267', 'step': 19686, 'epoch': 3} {'type': 'loss', 'content': 0.022218093276023865, 'timestamp': '2025-10-01 04:37:20.812360', 'step': 19687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:20.883408', 'step': 19687, 'epoch': 3} {'type': 'loss', 'content': 0.09904345870018005, 'timestamp': '2025-10-01 04:37:20.907056', 'step': 19688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:20.953114', 'step': 19688, 'epoch': 3} {'type': 'loss', 'content': 0.0726810172200203, 'timestamp': '2025-10-01 04:37:20.956136', 'step': 19689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:21.002683', 'step': 19689, 'epoch': 3} {'type': 'loss', 'content': 0.10678064823150635, 'timestamp': '2025-10-01 04:37:21.005274', 'step': 19690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:21.050969', 'step': 19690, 'epoch': 3} {'type': 'loss', 'content': 0.04356919974088669, 'timestamp': '2025-10-01 04:37:21.054619', 'step': 19691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:21.133283', 'step': 19691, 'epoch': 3} {'type': 'loss', 'content': 0.03970922529697418, 'timestamp': '2025-10-01 04:37:21.156985', 'step': 19692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:21.217355', 'step': 19692, 'epoch': 3} {'type': 'loss', 'content': 0.1034051775932312, 'timestamp': '2025-10-01 04:37:21.220286', 'step': 19693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:21.285293', 'step': 19693, 'epoch': 3} {'type': 'loss', 'content': 0.12263928353786469, 'timestamp': '2025-10-01 04:37:21.288996', 'step': 19694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:21.330702', 'step': 19694, 'epoch': 3} {'type': 'loss', 'content': 0.0986671894788742, 'timestamp': '2025-10-01 04:37:21.332804', 'step': 19695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:21.378489', 'step': 19695, 'epoch': 3} {'type': 'loss', 'content': 0.03344028443098068, 'timestamp': '2025-10-01 04:37:21.403991', 'step': 19696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:21.452797', 'step': 19696, 'epoch': 3} {'type': 'loss', 'content': 0.06927962601184845, 'timestamp': '2025-10-01 04:37:21.454844', 'step': 19697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:21.526027', 'step': 19697, 'epoch': 3} {'type': 'loss', 'content': 0.03383227810263634, 'timestamp': '2025-10-01 04:37:21.528512', 'step': 19698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:21.597923', 'step': 19698, 'epoch': 3} {'type': 'loss', 'content': 0.08526269346475601, 'timestamp': '2025-10-01 04:37:21.600244', 'step': 19699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:21.647624', 'step': 19699, 'epoch': 3} {'type': 'loss', 'content': 0.014247044920921326, 'timestamp': '2025-10-01 04:37:21.671239', 'step': 19700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:21.730071', 'step': 19700, 'epoch': 3} {'type': 'loss', 'content': 0.13780978322029114, 'timestamp': '2025-10-01 04:37:21.732754', 'step': 19701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:21.771451', 'step': 19701, 'epoch': 3} {'type': 'loss', 'content': 0.10476616770029068, 'timestamp': '2025-10-01 04:37:21.773514', 'step': 19702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:21.815905', 'step': 19702, 'epoch': 3} {'type': 'loss', 'content': 0.06422863155603409, 'timestamp': '2025-10-01 04:37:21.818446', 'step': 19703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:21.878336', 'step': 19703, 'epoch': 3} {'type': 'loss', 'content': 0.14372311532497406, 'timestamp': '2025-10-01 04:37:21.903453', 'step': 19704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:21.955076', 'step': 19704, 'epoch': 3} {'type': 'loss', 'content': 0.06153326854109764, 'timestamp': '2025-10-01 04:37:21.957740', 'step': 19705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:22.024865', 'step': 19705, 'epoch': 3} {'type': 'loss', 'content': 0.049360018223524094, 'timestamp': '2025-10-01 04:37:22.028225', 'step': 19706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:22.067808', 'step': 19706, 'epoch': 3} {'type': 'loss', 'content': 0.02240614965558052, 'timestamp': '2025-10-01 04:37:22.069780', 'step': 19707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:22.137432', 'step': 19707, 'epoch': 3} {'type': 'loss', 'content': 0.028899189084768295, 'timestamp': '2025-10-01 04:37:22.160966', 'step': 19708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:22.218714', 'step': 19708, 'epoch': 3} {'type': 'loss', 'content': 0.03949110209941864, 'timestamp': '2025-10-01 04:37:22.220955', 'step': 19709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:22.256229', 'step': 19709, 'epoch': 3} {'type': 'loss', 'content': 0.07808037102222443, 'timestamp': '2025-10-01 04:37:22.258656', 'step': 19710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:22.296205', 'step': 19710, 'epoch': 3} {'type': 'loss', 'content': 0.052101150155067444, 'timestamp': '2025-10-01 04:37:22.302034', 'step': 19711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:22.337080', 'step': 19711, 'epoch': 3} {'type': 'loss', 'content': 0.13769397139549255, 'timestamp': '2025-10-01 04:37:22.360742', 'step': 19712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:22.410338', 'step': 19712, 'epoch': 3} {'type': 'loss', 'content': 0.14220106601715088, 'timestamp': '2025-10-01 04:37:22.412717', 'step': 19713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:22.447794', 'step': 19713, 'epoch': 3} {'type': 'loss', 'content': 0.07144639641046524, 'timestamp': '2025-10-01 04:37:22.450542', 'step': 19714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:22.486824', 'step': 19714, 'epoch': 3} {'type': 'loss', 'content': 0.05168166756629944, 'timestamp': '2025-10-01 04:37:22.490921', 'step': 19715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:22.540590', 'step': 19715, 'epoch': 3} {'type': 'loss', 'content': 0.04468139261007309, 'timestamp': '2025-10-01 04:37:22.564625', 'step': 19716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:22.600889', 'step': 19716, 'epoch': 3} {'type': 'loss', 'content': 0.11584395170211792, 'timestamp': '2025-10-01 04:37:22.603593', 'step': 19717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:22.637593', 'step': 19717, 'epoch': 3} {'type': 'loss', 'content': 0.12629413604736328, 'timestamp': '2025-10-01 04:37:22.639841', 'step': 19718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:22.672753', 'step': 19718, 'epoch': 3} {'type': 'loss', 'content': 0.05134234204888344, 'timestamp': '2025-10-01 04:37:22.676069', 'step': 19719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:22.729914', 'step': 19719, 'epoch': 3} {'type': 'loss', 'content': 0.06629166752099991, 'timestamp': '2025-10-01 04:37:22.753785', 'step': 19720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:22.790437', 'step': 19720, 'epoch': 3} {'type': 'loss', 'content': 0.04745645448565483, 'timestamp': '2025-10-01 04:37:22.793143', 'step': 19721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:22.825394', 'step': 19721, 'epoch': 3} {'type': 'loss', 'content': 0.09110205620527267, 'timestamp': '2025-10-01 04:37:22.828127', 'step': 19722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:22.865047', 'step': 19722, 'epoch': 3} {'type': 'loss', 'content': 0.08978819102048874, 'timestamp': '2025-10-01 04:37:22.869200', 'step': 19723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:22.914944', 'step': 19723, 'epoch': 3} {'type': 'loss', 'content': 0.09560391306877136, 'timestamp': '2025-10-01 04:37:22.939039', 'step': 19724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:22.989595', 'step': 19724, 'epoch': 3} {'type': 'loss', 'content': 0.1429324448108673, 'timestamp': '2025-10-01 04:37:22.992219', 'step': 19725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:23.044589', 'step': 19725, 'epoch': 3} {'type': 'loss', 'content': 0.0665280818939209, 'timestamp': '2025-10-01 04:37:23.047327', 'step': 19726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:23.085591', 'step': 19726, 'epoch': 3} {'type': 'loss', 'content': 0.036903612315654755, 'timestamp': '2025-10-01 04:37:23.088512', 'step': 19727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:23.124814', 'step': 19727, 'epoch': 3} {'type': 'loss', 'content': 0.0704169049859047, 'timestamp': '2025-10-01 04:37:23.149109', 'step': 19728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:23.185880', 'step': 19728, 'epoch': 3} {'type': 'loss', 'content': 0.052535951137542725, 'timestamp': '2025-10-01 04:37:23.190393', 'step': 19729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:23.226685', 'step': 19729, 'epoch': 3} {'type': 'loss', 'content': 0.10771568864583969, 'timestamp': '2025-10-01 04:37:23.229574', 'step': 19730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:23.265347', 'step': 19730, 'epoch': 3} {'type': 'loss', 'content': 0.025940686464309692, 'timestamp': '2025-10-01 04:37:23.268685', 'step': 19731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:23.306335', 'step': 19731, 'epoch': 3} {'type': 'loss', 'content': 0.08254732936620712, 'timestamp': '2025-10-01 04:37:23.330545', 'step': 19732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:23.365804', 'step': 19732, 'epoch': 3} {'type': 'loss', 'content': 0.1336660236120224, 'timestamp': '2025-10-01 04:37:23.369061', 'step': 19733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:23.404427', 'step': 19733, 'epoch': 3} {'type': 'loss', 'content': 0.06546589732170105, 'timestamp': '2025-10-01 04:37:23.406853', 'step': 19734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:23.443159', 'step': 19734, 'epoch': 3} {'type': 'loss', 'content': 0.05179378390312195, 'timestamp': '2025-10-01 04:37:23.445416', 'step': 19735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:23.524038', 'step': 19735, 'epoch': 3} {'type': 'loss', 'content': 0.0508873388171196, 'timestamp': '2025-10-01 04:37:23.547949', 'step': 19736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:23.600016', 'step': 19736, 'epoch': 3} {'type': 'loss', 'content': 0.10246533155441284, 'timestamp': '2025-10-01 04:37:23.602402', 'step': 19737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:23.636057', 'step': 19737, 'epoch': 3} {'type': 'loss', 'content': 0.05107125639915466, 'timestamp': '2025-10-01 04:37:23.638525', 'step': 19738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:23.671591', 'step': 19738, 'epoch': 3} {'type': 'loss', 'content': 0.02628166414797306, 'timestamp': '2025-10-01 04:37:23.674319', 'step': 19739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:23.707751', 'step': 19739, 'epoch': 3} {'type': 'loss', 'content': 0.030626703053712845, 'timestamp': '2025-10-01 04:37:23.731860', 'step': 19740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:23.765314', 'step': 19740, 'epoch': 3} {'type': 'loss', 'content': 0.038594748824834824, 'timestamp': '2025-10-01 04:37:23.767646', 'step': 19741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:23.817646', 'step': 19741, 'epoch': 3} {'type': 'loss', 'content': 0.1267477422952652, 'timestamp': '2025-10-01 04:37:23.820520', 'step': 19742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:23.855049', 'step': 19742, 'epoch': 3} {'type': 'loss', 'content': 0.0013083764351904392, 'timestamp': '2025-10-01 04:37:23.857701', 'step': 19743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:23.892238', 'step': 19743, 'epoch': 3} {'type': 'loss', 'content': 0.026448046788573265, 'timestamp': '2025-10-01 04:37:23.920774', 'step': 19744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:23.974306', 'step': 19744, 'epoch': 3} {'type': 'loss', 'content': 0.05465152487158775, 'timestamp': '2025-10-01 04:37:23.976930', 'step': 19745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:24.011136', 'step': 19745, 'epoch': 3} {'type': 'loss', 'content': 0.03722355514764786, 'timestamp': '2025-10-01 04:37:24.013437', 'step': 19746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:24.048139', 'step': 19746, 'epoch': 3} {'type': 'loss', 'content': 0.07113347947597504, 'timestamp': '2025-10-01 04:37:24.050564', 'step': 19747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:24.087111', 'step': 19747, 'epoch': 3} {'type': 'loss', 'content': 0.015278995037078857, 'timestamp': '2025-10-01 04:37:24.114056', 'step': 19748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:24.149795', 'step': 19748, 'epoch': 3} {'type': 'loss', 'content': 0.10665429383516312, 'timestamp': '2025-10-01 04:37:24.153801', 'step': 19749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:24.189256', 'step': 19749, 'epoch': 3} {'type': 'loss', 'content': 0.0672425702214241, 'timestamp': '2025-10-01 04:37:24.191856', 'step': 19750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:24.227427', 'step': 19750, 'epoch': 3} {'type': 'loss', 'content': 0.09056715667247772, 'timestamp': '2025-10-01 04:37:24.230404', 'step': 19751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:24.280857', 'step': 19751, 'epoch': 3} {'type': 'loss', 'content': 0.05529259890317917, 'timestamp': '2025-10-01 04:37:24.320437', 'step': 19752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:24.358173', 'step': 19752, 'epoch': 3} {'type': 'loss', 'content': 0.03470338135957718, 'timestamp': '2025-10-01 04:37:24.360330', 'step': 19753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:24.396139', 'step': 19753, 'epoch': 3} {'type': 'loss', 'content': 0.08197098970413208, 'timestamp': '2025-10-01 04:37:24.398394', 'step': 19754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:24.434267', 'step': 19754, 'epoch': 3} {'type': 'loss', 'content': 0.021686578169465065, 'timestamp': '2025-10-01 04:37:24.436962', 'step': 19755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:24.502297', 'step': 19755, 'epoch': 3} {'type': 'loss', 'content': 0.10017109662294388, 'timestamp': '2025-10-01 04:37:24.526282', 'step': 19756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:24.575755', 'step': 19756, 'epoch': 3} {'type': 'loss', 'content': 0.018409525975584984, 'timestamp': '2025-10-01 04:37:24.577990', 'step': 19757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:24.628863', 'step': 19757, 'epoch': 3} {'type': 'loss', 'content': 0.04381858929991722, 'timestamp': '2025-10-01 04:37:24.631139', 'step': 19758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:24.663808', 'step': 19758, 'epoch': 3} {'type': 'loss', 'content': 0.11800733208656311, 'timestamp': '2025-10-01 04:37:24.666136', 'step': 19759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:24.729032', 'step': 19759, 'epoch': 3} {'type': 'loss', 'content': 0.049480557441711426, 'timestamp': '2025-10-01 04:37:24.752741', 'step': 19760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:24.788733', 'step': 19760, 'epoch': 3} {'type': 'loss', 'content': 0.07172045111656189, 'timestamp': '2025-10-01 04:37:24.791757', 'step': 19761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:24.840604', 'step': 19761, 'epoch': 3} {'type': 'loss', 'content': 0.048113953322172165, 'timestamp': '2025-10-01 04:37:24.843423', 'step': 19762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:24.907910', 'step': 19762, 'epoch': 3} {'type': 'loss', 'content': 0.0761016309261322, 'timestamp': '2025-10-01 04:37:24.910305', 'step': 19763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:24.961521', 'step': 19763, 'epoch': 3} {'type': 'loss', 'content': 0.054369885474443436, 'timestamp': '2025-10-01 04:37:24.985302', 'step': 19764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:25.037088', 'step': 19764, 'epoch': 3} {'type': 'loss', 'content': 0.065666064620018, 'timestamp': '2025-10-01 04:37:25.039404', 'step': 19765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:25.077063', 'step': 19765, 'epoch': 3} {'type': 'loss', 'content': 0.03565099090337753, 'timestamp': '2025-10-01 04:37:25.079362', 'step': 19766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:25.129188', 'step': 19766, 'epoch': 3} {'type': 'loss', 'content': 0.0707945004105568, 'timestamp': '2025-10-01 04:37:25.131721', 'step': 19767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:25.180272', 'step': 19767, 'epoch': 3} {'type': 'loss', 'content': 0.050274237990379333, 'timestamp': '2025-10-01 04:37:25.204252', 'step': 19768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:25.252558', 'step': 19768, 'epoch': 3} {'type': 'loss', 'content': 0.09602038562297821, 'timestamp': '2025-10-01 04:37:25.254786', 'step': 19769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:25.299191', 'step': 19769, 'epoch': 3} {'type': 'loss', 'content': 0.06493820250034332, 'timestamp': '2025-10-01 04:37:25.302607', 'step': 19770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:25.340260', 'step': 19770, 'epoch': 3} {'type': 'loss', 'content': 0.08541861921548843, 'timestamp': '2025-10-01 04:37:25.343107', 'step': 19771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:25.378388', 'step': 19771, 'epoch': 3} {'type': 'loss', 'content': 0.08681517839431763, 'timestamp': '2025-10-01 04:37:25.402339', 'step': 19772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:25.436998', 'step': 19772, 'epoch': 3} {'type': 'loss', 'content': 0.053535666316747665, 'timestamp': '2025-10-01 04:37:25.439177', 'step': 19773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:25.476797', 'step': 19773, 'epoch': 3} {'type': 'loss', 'content': 0.02474628947675228, 'timestamp': '2025-10-01 04:37:25.490301', 'step': 19774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:25.528852', 'step': 19774, 'epoch': 3} {'type': 'loss', 'content': 0.07499674707651138, 'timestamp': '2025-10-01 04:37:25.531752', 'step': 19775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:25.567544', 'step': 19775, 'epoch': 3} {'type': 'loss', 'content': 0.10756255686283112, 'timestamp': '2025-10-01 04:37:25.591235', 'step': 19776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:25.642557', 'step': 19776, 'epoch': 3} {'type': 'loss', 'content': 0.07910197973251343, 'timestamp': '2025-10-01 04:37:25.644783', 'step': 19777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:25.680381', 'step': 19777, 'epoch': 3} {'type': 'loss', 'content': 0.07573343068361282, 'timestamp': '2025-10-01 04:37:25.682891', 'step': 19778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:25.735694', 'step': 19778, 'epoch': 3} {'type': 'loss', 'content': 0.08416248857975006, 'timestamp': '2025-10-01 04:37:25.738031', 'step': 19779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:25.772701', 'step': 19779, 'epoch': 3} {'type': 'loss', 'content': 0.042958661913871765, 'timestamp': '2025-10-01 04:37:25.796495', 'step': 19780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:25.831981', 'step': 19780, 'epoch': 3} {'type': 'loss', 'content': 0.05858207494020462, 'timestamp': '2025-10-01 04:37:25.835063', 'step': 19781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:25.871803', 'step': 19781, 'epoch': 3} {'type': 'loss', 'content': 0.03689302131533623, 'timestamp': '2025-10-01 04:37:25.874595', 'step': 19782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:25.938990', 'step': 19782, 'epoch': 3} {'type': 'loss', 'content': 0.03946053236722946, 'timestamp': '2025-10-01 04:37:25.941502', 'step': 19783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:25.991918', 'step': 19783, 'epoch': 3} {'type': 'loss', 'content': 0.019551655277609825, 'timestamp': '2025-10-01 04:37:26.015664', 'step': 19784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:26.061801', 'step': 19784, 'epoch': 3} {'type': 'loss', 'content': 0.03653633967041969, 'timestamp': '2025-10-01 04:37:26.064235', 'step': 19785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:37:26.126235', 'step': 19785, 'epoch': 3} {'type': 'loss', 'content': 0.14725537598133087, 'timestamp': '2025-10-01 04:37:26.130640', 'step': 19786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:26.164180', 'step': 19786, 'epoch': 3} {'type': 'loss', 'content': 0.059407491236925125, 'timestamp': '2025-10-01 04:37:26.167715', 'step': 19787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:26.202097', 'step': 19787, 'epoch': 3} {'type': 'loss', 'content': 0.05274997651576996, 'timestamp': '2025-10-01 04:37:26.242250', 'step': 19788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:26.279862', 'step': 19788, 'epoch': 3} {'type': 'loss', 'content': 0.10052862018346786, 'timestamp': '2025-10-01 04:37:26.288024', 'step': 19789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:26.323809', 'step': 19789, 'epoch': 3} {'type': 'loss', 'content': 0.07072808593511581, 'timestamp': '2025-10-01 04:37:26.326642', 'step': 19790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:26.360925', 'step': 19790, 'epoch': 3} {'type': 'loss', 'content': 0.02100824937224388, 'timestamp': '2025-10-01 04:37:26.363791', 'step': 19791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:26.399635', 'step': 19791, 'epoch': 3} {'type': 'loss', 'content': 0.033874161541461945, 'timestamp': '2025-10-01 04:37:26.423432', 'step': 19792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:26.474780', 'step': 19792, 'epoch': 3} {'type': 'loss', 'content': 0.03392958268523216, 'timestamp': '2025-10-01 04:37:26.476944', 'step': 19793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:26.536334', 'step': 19793, 'epoch': 3} {'type': 'loss', 'content': 0.08949721604585648, 'timestamp': '2025-10-01 04:37:26.540437', 'step': 19794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:26.579909', 'step': 19794, 'epoch': 3} {'type': 'loss', 'content': 0.058754246681928635, 'timestamp': '2025-10-01 04:37:26.584152', 'step': 19795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:26.632681', 'step': 19795, 'epoch': 3} {'type': 'loss', 'content': 0.053100891411304474, 'timestamp': '2025-10-01 04:37:26.656450', 'step': 19796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:26.692518', 'step': 19796, 'epoch': 3} {'type': 'loss', 'content': 0.057295624166727066, 'timestamp': '2025-10-01 04:37:26.694828', 'step': 19797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:26.730926', 'step': 19797, 'epoch': 3} {'type': 'loss', 'content': 0.04489961639046669, 'timestamp': '2025-10-01 04:37:26.733327', 'step': 19798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:26.767689', 'step': 19798, 'epoch': 3} {'type': 'loss', 'content': 0.08614836633205414, 'timestamp': '2025-10-01 04:37:26.770413', 'step': 19799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:26.809718', 'step': 19799, 'epoch': 3} {'type': 'loss', 'content': 0.08020256459712982, 'timestamp': '2025-10-01 04:37:26.833657', 'step': 19800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:26.868143', 'step': 19800, 'epoch': 3} {'type': 'loss', 'content': 0.0959244892001152, 'timestamp': '2025-10-01 04:37:26.872001', 'step': 19801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:26.906302', 'step': 19801, 'epoch': 3} {'type': 'loss', 'content': 0.046831950545310974, 'timestamp': '2025-10-01 04:37:26.909405', 'step': 19802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:26.947584', 'step': 19802, 'epoch': 3} {'type': 'loss', 'content': 0.02248689904808998, 'timestamp': '2025-10-01 04:37:26.949921', 'step': 19803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:26.983396', 'step': 19803, 'epoch': 3} {'type': 'loss', 'content': 0.045117758214473724, 'timestamp': '2025-10-01 04:37:27.007297', 'step': 19804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:27.040351', 'step': 19804, 'epoch': 3} {'type': 'loss', 'content': 0.06846767663955688, 'timestamp': '2025-10-01 04:37:27.042667', 'step': 19805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:27.093368', 'step': 19805, 'epoch': 3} {'type': 'loss', 'content': 0.09491050243377686, 'timestamp': '2025-10-01 04:37:27.096449', 'step': 19806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:27.131041', 'step': 19806, 'epoch': 3} {'type': 'loss', 'content': 0.06978745758533478, 'timestamp': '2025-10-01 04:37:27.134711', 'step': 19807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:27.168642', 'step': 19807, 'epoch': 3} {'type': 'loss', 'content': 0.0675744041800499, 'timestamp': '2025-10-01 04:37:27.193266', 'step': 19808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:27.226441', 'step': 19808, 'epoch': 3} {'type': 'loss', 'content': 0.11142401397228241, 'timestamp': '2025-10-01 04:37:27.228671', 'step': 19809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:27.264050', 'step': 19809, 'epoch': 3} {'type': 'loss', 'content': 0.06773357838392258, 'timestamp': '2025-10-01 04:37:27.266816', 'step': 19810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:27.330482', 'step': 19810, 'epoch': 3} {'type': 'loss', 'content': 0.1276516318321228, 'timestamp': '2025-10-01 04:37:27.332664', 'step': 19811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:27.368961', 'step': 19811, 'epoch': 3} {'type': 'loss', 'content': 0.0391802117228508, 'timestamp': '2025-10-01 04:37:27.393230', 'step': 19812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:27.426193', 'step': 19812, 'epoch': 3} {'type': 'loss', 'content': 0.1284414827823639, 'timestamp': '2025-10-01 04:37:27.428518', 'step': 19813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:27.462250', 'step': 19813, 'epoch': 3} {'type': 'loss', 'content': 0.13472482562065125, 'timestamp': '2025-10-01 04:37:27.465217', 'step': 19814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:27.499927', 'step': 19814, 'epoch': 3} {'type': 'loss', 'content': 0.07458851486444473, 'timestamp': '2025-10-01 04:37:27.502408', 'step': 19815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:27.537414', 'step': 19815, 'epoch': 3} {'type': 'loss', 'content': 0.04185369610786438, 'timestamp': '2025-10-01 04:37:27.561110', 'step': 19816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:27.596933', 'step': 19816, 'epoch': 3} {'type': 'loss', 'content': 0.06483907997608185, 'timestamp': '2025-10-01 04:37:27.599226', 'step': 19817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:27.634890', 'step': 19817, 'epoch': 3} {'type': 'loss', 'content': 0.06428790092468262, 'timestamp': '2025-10-01 04:37:27.637186', 'step': 19818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:27.693658', 'step': 19818, 'epoch': 3} {'type': 'loss', 'content': 0.1460234522819519, 'timestamp': '2025-10-01 04:37:27.695963', 'step': 19819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:27.730510', 'step': 19819, 'epoch': 3} {'type': 'loss', 'content': 0.10768503695726395, 'timestamp': '2025-10-01 04:37:27.756393', 'step': 19820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:27.790300', 'step': 19820, 'epoch': 3} {'type': 'loss', 'content': 0.06720057129859924, 'timestamp': '2025-10-01 04:37:27.792537', 'step': 19821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:27.843799', 'step': 19821, 'epoch': 3} {'type': 'loss', 'content': 0.07663799077272415, 'timestamp': '2025-10-01 04:37:27.846100', 'step': 19822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:27.884919', 'step': 19822, 'epoch': 3} {'type': 'loss', 'content': 0.02568047307431698, 'timestamp': '2025-10-01 04:37:27.888513', 'step': 19823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:27.925302', 'step': 19823, 'epoch': 3} {'type': 'loss', 'content': 0.04018203169107437, 'timestamp': '2025-10-01 04:37:27.949766', 'step': 19824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:27.999574', 'step': 19824, 'epoch': 3} {'type': 'loss', 'content': 0.021042486652731895, 'timestamp': '2025-10-01 04:37:28.001789', 'step': 19825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:28.048497', 'step': 19825, 'epoch': 3} {'type': 'loss', 'content': 0.07524669170379639, 'timestamp': '2025-10-01 04:37:28.050764', 'step': 19826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:28.094350', 'step': 19826, 'epoch': 3} {'type': 'loss', 'content': 0.05796283483505249, 'timestamp': '2025-10-01 04:37:28.096596', 'step': 19827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:28.131618', 'step': 19827, 'epoch': 3} {'type': 'loss', 'content': 0.13604292273521423, 'timestamp': '2025-10-01 04:37:28.155419', 'step': 19828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:28.189361', 'step': 19828, 'epoch': 3} {'type': 'loss', 'content': 0.03884733468294144, 'timestamp': '2025-10-01 04:37:28.191665', 'step': 19829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:28.225800', 'step': 19829, 'epoch': 3} {'type': 'loss', 'content': 0.07887133210897446, 'timestamp': '2025-10-01 04:37:28.230030', 'step': 19830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:28.264574', 'step': 19830, 'epoch': 3} {'type': 'loss', 'content': 0.10629856586456299, 'timestamp': '2025-10-01 04:37:28.266927', 'step': 19831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:28.299686', 'step': 19831, 'epoch': 3} {'type': 'loss', 'content': 0.09191279113292694, 'timestamp': '2025-10-01 04:37:28.323413', 'step': 19832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:28.368822', 'step': 19832, 'epoch': 3} {'type': 'loss', 'content': 0.07150459289550781, 'timestamp': '2025-10-01 04:37:28.371167', 'step': 19833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:28.407602', 'step': 19833, 'epoch': 3} {'type': 'loss', 'content': 0.2169264853000641, 'timestamp': '2025-10-01 04:37:28.409816', 'step': 19834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:28.445564', 'step': 19834, 'epoch': 3} {'type': 'loss', 'content': 0.07766387611627579, 'timestamp': '2025-10-01 04:37:28.447920', 'step': 19835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:28.482239', 'step': 19835, 'epoch': 3} {'type': 'loss', 'content': 0.058964427560567856, 'timestamp': '2025-10-01 04:37:28.505978', 'step': 19836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:28.541331', 'step': 19836, 'epoch': 3} {'type': 'loss', 'content': 0.10326635092496872, 'timestamp': '2025-10-01 04:37:28.543503', 'step': 19837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:28.575845', 'step': 19837, 'epoch': 3} {'type': 'loss', 'content': 0.025879280641674995, 'timestamp': '2025-10-01 04:37:28.578253', 'step': 19838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:28.613613', 'step': 19838, 'epoch': 3} {'type': 'loss', 'content': 0.0779336467385292, 'timestamp': '2025-10-01 04:37:28.615835', 'step': 19839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:28.648551', 'step': 19839, 'epoch': 3} {'type': 'loss', 'content': 0.09183801710605621, 'timestamp': '2025-10-01 04:37:28.672389', 'step': 19840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:28.707031', 'step': 19840, 'epoch': 3} {'type': 'loss', 'content': 0.03949581831693649, 'timestamp': '2025-10-01 04:37:28.709280', 'step': 19841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:28.744996', 'step': 19841, 'epoch': 3} {'type': 'loss', 'content': 0.05439822003245354, 'timestamp': '2025-10-01 04:37:28.747246', 'step': 19842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:28.781389', 'step': 19842, 'epoch': 3} {'type': 'loss', 'content': 0.07152317464351654, 'timestamp': '2025-10-01 04:37:28.784051', 'step': 19843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:28.822536', 'step': 19843, 'epoch': 3} {'type': 'loss', 'content': 0.04567285254597664, 'timestamp': '2025-10-01 04:37:28.846445', 'step': 19844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:28.892686', 'step': 19844, 'epoch': 3} {'type': 'loss', 'content': 0.09299012273550034, 'timestamp': '2025-10-01 04:37:28.894863', 'step': 19845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:28.929066', 'step': 19845, 'epoch': 3} {'type': 'loss', 'content': 0.10170533508062363, 'timestamp': '2025-10-01 04:37:28.932159', 'step': 19846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:28.967955', 'step': 19846, 'epoch': 3} {'type': 'loss', 'content': 0.1601892113685608, 'timestamp': '2025-10-01 04:37:28.970340', 'step': 19847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:29.004169', 'step': 19847, 'epoch': 3} {'type': 'loss', 'content': 0.10578134655952454, 'timestamp': '2025-10-01 04:37:29.027574', 'step': 19848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:29.064519', 'step': 19848, 'epoch': 3} {'type': 'loss', 'content': 0.05422321707010269, 'timestamp': '2025-10-01 04:37:29.066545', 'step': 19849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:29.102168', 'step': 19849, 'epoch': 3} {'type': 'loss', 'content': 0.10946623980998993, 'timestamp': '2025-10-01 04:37:29.104778', 'step': 19850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:29.139431', 'step': 19850, 'epoch': 3} {'type': 'loss', 'content': 0.06550120562314987, 'timestamp': '2025-10-01 04:37:29.141406', 'step': 19851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:29.174150', 'step': 19851, 'epoch': 3} {'type': 'loss', 'content': 0.09300930052995682, 'timestamp': '2025-10-01 04:37:29.197651', 'step': 19852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:29.246051', 'step': 19852, 'epoch': 3} {'type': 'loss', 'content': 0.020755479112267494, 'timestamp': '2025-10-01 04:37:29.248090', 'step': 19853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:29.296848', 'step': 19853, 'epoch': 3} {'type': 'loss', 'content': 0.1269461065530777, 'timestamp': '2025-10-01 04:37:29.298904', 'step': 19854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:29.335828', 'step': 19854, 'epoch': 3} {'type': 'loss', 'content': 0.07808733731508255, 'timestamp': '2025-10-01 04:37:29.337988', 'step': 19855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:29.385086', 'step': 19855, 'epoch': 3} {'type': 'loss', 'content': 0.06234321370720863, 'timestamp': '2025-10-01 04:37:29.408711', 'step': 19856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:29.443778', 'step': 19856, 'epoch': 3} {'type': 'loss', 'content': 0.14277449250221252, 'timestamp': '2025-10-01 04:37:29.445929', 'step': 19857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:29.491540', 'step': 19857, 'epoch': 3} {'type': 'loss', 'content': 0.0815039649605751, 'timestamp': '2025-10-01 04:37:29.493750', 'step': 19858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:29.527205', 'step': 19858, 'epoch': 3} {'type': 'loss', 'content': 0.11011992394924164, 'timestamp': '2025-10-01 04:37:29.529251', 'step': 19859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:29.567435', 'step': 19859, 'epoch': 3} {'type': 'loss', 'content': 0.05978052690625191, 'timestamp': '2025-10-01 04:37:29.591109', 'step': 19860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:29.626604', 'step': 19860, 'epoch': 3} {'type': 'loss', 'content': 0.0667153149843216, 'timestamp': '2025-10-01 04:37:29.628754', 'step': 19861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:29.665543', 'step': 19861, 'epoch': 3} {'type': 'loss', 'content': 0.10491303354501724, 'timestamp': '2025-10-01 04:37:29.667591', 'step': 19862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:29.727912', 'step': 19862, 'epoch': 3} {'type': 'loss', 'content': 0.018318619579076767, 'timestamp': '2025-10-01 04:37:29.730065', 'step': 19863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:29.791515', 'step': 19863, 'epoch': 3} {'type': 'loss', 'content': 0.15568998456001282, 'timestamp': '2025-10-01 04:37:29.815124', 'step': 19864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:29.861974', 'step': 19864, 'epoch': 3} {'type': 'loss', 'content': 0.1045975461602211, 'timestamp': '2025-10-01 04:37:29.864043', 'step': 19865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:29.916607', 'step': 19865, 'epoch': 3} {'type': 'loss', 'content': 0.10053813457489014, 'timestamp': '2025-10-01 04:37:29.919580', 'step': 19866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:29.959685', 'step': 19866, 'epoch': 3} {'type': 'loss', 'content': 0.15254206955432892, 'timestamp': '2025-10-01 04:37:29.962112', 'step': 19867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:30.009417', 'step': 19867, 'epoch': 3} {'type': 'loss', 'content': 0.03152266889810562, 'timestamp': '2025-10-01 04:37:30.032897', 'step': 19868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:30.076299', 'step': 19868, 'epoch': 3} {'type': 'loss', 'content': 0.08407201617956161, 'timestamp': '2025-10-01 04:37:30.079082', 'step': 19869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:30.139484', 'step': 19869, 'epoch': 3} {'type': 'loss', 'content': 0.07984593510627747, 'timestamp': '2025-10-01 04:37:30.142220', 'step': 19870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:30.176396', 'step': 19870, 'epoch': 3} {'type': 'loss', 'content': 0.043346501886844635, 'timestamp': '2025-10-01 04:37:30.182138', 'step': 19871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:30.255026', 'step': 19871, 'epoch': 3} {'type': 'loss', 'content': 0.07792685180902481, 'timestamp': '2025-10-01 04:37:30.278718', 'step': 19872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:30.319624', 'step': 19872, 'epoch': 3} {'type': 'loss', 'content': 0.10853742808103561, 'timestamp': '2025-10-01 04:37:30.321806', 'step': 19873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:30.370706', 'step': 19873, 'epoch': 3} {'type': 'loss', 'content': 0.07676946371793747, 'timestamp': '2025-10-01 04:37:30.373446', 'step': 19874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:30.409685', 'step': 19874, 'epoch': 3} {'type': 'loss', 'content': 0.07487992197275162, 'timestamp': '2025-10-01 04:37:30.413973', 'step': 19875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:30.448852', 'step': 19875, 'epoch': 3} {'type': 'loss', 'content': 0.09245812892913818, 'timestamp': '2025-10-01 04:37:30.472499', 'step': 19876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:30.522315', 'step': 19876, 'epoch': 3} {'type': 'loss', 'content': 0.0621829554438591, 'timestamp': '2025-10-01 04:37:30.524328', 'step': 19877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:30.585740', 'step': 19877, 'epoch': 3} {'type': 'loss', 'content': 0.072001151740551, 'timestamp': '2025-10-01 04:37:30.587628', 'step': 19878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:30.624185', 'step': 19878, 'epoch': 3} {'type': 'loss', 'content': 0.10445114225149155, 'timestamp': '2025-10-01 04:37:30.626058', 'step': 19879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:30.685702', 'step': 19879, 'epoch': 3} {'type': 'loss', 'content': 0.06100774556398392, 'timestamp': '2025-10-01 04:37:30.709480', 'step': 19880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:30.762444', 'step': 19880, 'epoch': 3} {'type': 'loss', 'content': 0.06240793690085411, 'timestamp': '2025-10-01 04:37:30.764472', 'step': 19881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:30.834162', 'step': 19881, 'epoch': 3} {'type': 'loss', 'content': 0.15906362235546112, 'timestamp': '2025-10-01 04:37:30.838345', 'step': 19882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:30.890372', 'step': 19882, 'epoch': 3} {'type': 'loss', 'content': 0.037016380578279495, 'timestamp': '2025-10-01 04:37:30.895558', 'step': 19883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:30.954600', 'step': 19883, 'epoch': 3} {'type': 'loss', 'content': 0.056695785373449326, 'timestamp': '2025-10-01 04:37:30.978097', 'step': 19884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:31.031273', 'step': 19884, 'epoch': 3} {'type': 'loss', 'content': 0.0830850899219513, 'timestamp': '2025-10-01 04:37:31.033520', 'step': 19885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:31.087506', 'step': 19885, 'epoch': 3} {'type': 'loss', 'content': 0.1277981549501419, 'timestamp': '2025-10-01 04:37:31.090063', 'step': 19886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:31.147920', 'step': 19886, 'epoch': 3} {'type': 'loss', 'content': 0.044162567704916, 'timestamp': '2025-10-01 04:37:31.150403', 'step': 19887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:31.200446', 'step': 19887, 'epoch': 3} {'type': 'loss', 'content': 0.02384280413389206, 'timestamp': '2025-10-01 04:37:31.239180', 'step': 19888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:31.284457', 'step': 19888, 'epoch': 3} {'type': 'loss', 'content': 0.1566622108221054, 'timestamp': '2025-10-01 04:37:31.286628', 'step': 19889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:31.342379', 'step': 19889, 'epoch': 3} {'type': 'loss', 'content': 0.07219499349594116, 'timestamp': '2025-10-01 04:37:31.344613', 'step': 19890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:31.395285', 'step': 19890, 'epoch': 3} {'type': 'loss', 'content': 0.06395959109067917, 'timestamp': '2025-10-01 04:37:31.397250', 'step': 19891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:31.446829', 'step': 19891, 'epoch': 3} {'type': 'loss', 'content': 0.009077337570488453, 'timestamp': '2025-10-01 04:37:31.471942', 'step': 19892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:31.531897', 'step': 19892, 'epoch': 3} {'type': 'loss', 'content': 0.08772068470716476, 'timestamp': '2025-10-01 04:37:31.534148', 'step': 19893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:31.581864', 'step': 19893, 'epoch': 3} {'type': 'loss', 'content': 0.0880599394440651, 'timestamp': '2025-10-01 04:37:31.585524', 'step': 19894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:31.630469', 'step': 19894, 'epoch': 3} {'type': 'loss', 'content': 0.051061030477285385, 'timestamp': '2025-10-01 04:37:31.635016', 'step': 19895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:31.687942', 'step': 19895, 'epoch': 3} {'type': 'loss', 'content': 0.07131477445363998, 'timestamp': '2025-10-01 04:37:31.711410', 'step': 19896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:31.758449', 'step': 19896, 'epoch': 3} {'type': 'loss', 'content': 0.07550755143165588, 'timestamp': '2025-10-01 04:37:31.761449', 'step': 19897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:31.805594', 'step': 19897, 'epoch': 3} {'type': 'loss', 'content': 0.040611304342746735, 'timestamp': '2025-10-01 04:37:31.811396', 'step': 19898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:31.873997', 'step': 19898, 'epoch': 3} {'type': 'loss', 'content': 0.2032250314950943, 'timestamp': '2025-10-01 04:37:31.876496', 'step': 19899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:31.941830', 'step': 19899, 'epoch': 3} {'type': 'loss', 'content': 0.07683771103620529, 'timestamp': '2025-10-01 04:37:31.965663', 'step': 19900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:32.021526', 'step': 19900, 'epoch': 3} {'type': 'loss', 'content': 0.1294889599084854, 'timestamp': '2025-10-01 04:37:32.024045', 'step': 19901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:32.064229', 'step': 19901, 'epoch': 3} {'type': 'loss', 'content': 0.051041580736637115, 'timestamp': '2025-10-01 04:37:32.067089', 'step': 19902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:32.109159', 'step': 19902, 'epoch': 3} {'type': 'loss', 'content': 0.05461956560611725, 'timestamp': '2025-10-01 04:37:32.111451', 'step': 19903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:32.144146', 'step': 19903, 'epoch': 3} {'type': 'loss', 'content': 0.10037986189126968, 'timestamp': '2025-10-01 04:37:32.167927', 'step': 19904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:32.204955', 'step': 19904, 'epoch': 3} {'type': 'loss', 'content': 0.06359604001045227, 'timestamp': '2025-10-01 04:37:32.207175', 'step': 19905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:32.242381', 'step': 19905, 'epoch': 3} {'type': 'loss', 'content': 0.09647958725690842, 'timestamp': '2025-10-01 04:37:32.245190', 'step': 19906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:32.278216', 'step': 19906, 'epoch': 3} {'type': 'loss', 'content': 0.11848462373018265, 'timestamp': '2025-10-01 04:37:32.291847', 'step': 19907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:32.340316', 'step': 19907, 'epoch': 3} {'type': 'loss', 'content': 0.08069758862257004, 'timestamp': '2025-10-01 04:37:32.363842', 'step': 19908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:32.417979', 'step': 19908, 'epoch': 3} {'type': 'loss', 'content': 0.04420880228281021, 'timestamp': '2025-10-01 04:37:32.419937', 'step': 19909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:32.454410', 'step': 19909, 'epoch': 3} {'type': 'loss', 'content': 0.0380115769803524, 'timestamp': '2025-10-01 04:37:32.456887', 'step': 19910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:32.513518', 'step': 19910, 'epoch': 3} {'type': 'loss', 'content': 0.13915766775608063, 'timestamp': '2025-10-01 04:37:32.515444', 'step': 19911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:32.568564', 'step': 19911, 'epoch': 3} {'type': 'loss', 'content': 0.09820351004600525, 'timestamp': '2025-10-01 04:37:32.592123', 'step': 19912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:32.641798', 'step': 19912, 'epoch': 3} {'type': 'loss', 'content': 0.08443834632635117, 'timestamp': '2025-10-01 04:37:32.644735', 'step': 19913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:32.680143', 'step': 19913, 'epoch': 3} {'type': 'loss', 'content': 0.1487663835287094, 'timestamp': '2025-10-01 04:37:32.682224', 'step': 19914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:32.715517', 'step': 19914, 'epoch': 3} {'type': 'loss', 'content': 0.1119595468044281, 'timestamp': '2025-10-01 04:37:32.717672', 'step': 19915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:32.753128', 'step': 19915, 'epoch': 3} {'type': 'loss', 'content': 0.06092457100749016, 'timestamp': '2025-10-01 04:37:32.776791', 'step': 19916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:32.822907', 'step': 19916, 'epoch': 3} {'type': 'loss', 'content': 0.046743243932724, 'timestamp': '2025-10-01 04:37:32.825120', 'step': 19917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:32.860614', 'step': 19917, 'epoch': 3} {'type': 'loss', 'content': 0.06834726780653, 'timestamp': '2025-10-01 04:37:32.863070', 'step': 19918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:32.896384', 'step': 19918, 'epoch': 3} {'type': 'loss', 'content': 0.1330285519361496, 'timestamp': '2025-10-01 04:37:32.898335', 'step': 19919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:37:32.947180', 'step': 19919, 'epoch': 3} {'type': 'loss', 'content': 0.04961855709552765, 'timestamp': '2025-10-01 04:37:32.972886', 'step': 19920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:33.020204', 'step': 19920, 'epoch': 3} {'type': 'loss', 'content': 0.06666672229766846, 'timestamp': '2025-10-01 04:37:33.022173', 'step': 19921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:33.056964', 'step': 19921, 'epoch': 3} {'type': 'loss', 'content': 0.05538748577237129, 'timestamp': '2025-10-01 04:37:33.058838', 'step': 19922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:33.093054', 'step': 19922, 'epoch': 3} {'type': 'loss', 'content': 0.11327175050973892, 'timestamp': '2025-10-01 04:37:33.097136', 'step': 19923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:33.144795', 'step': 19923, 'epoch': 3} {'type': 'loss', 'content': 0.07423308491706848, 'timestamp': '2025-10-01 04:37:33.168611', 'step': 19924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:33.201808', 'step': 19924, 'epoch': 3} {'type': 'loss', 'content': 0.025558805093169212, 'timestamp': '2025-10-01 04:37:33.203928', 'step': 19925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:33.253843', 'step': 19925, 'epoch': 3} {'type': 'loss', 'content': 0.055390242487192154, 'timestamp': '2025-10-01 04:37:33.255655', 'step': 19926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:33.291032', 'step': 19926, 'epoch': 3} {'type': 'loss', 'content': 0.08404365926980972, 'timestamp': '2025-10-01 04:37:33.293008', 'step': 19927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:33.328700', 'step': 19927, 'epoch': 3} {'type': 'loss', 'content': 0.07273570448160172, 'timestamp': '2025-10-01 04:37:33.352192', 'step': 19928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:33.401950', 'step': 19928, 'epoch': 3} {'type': 'loss', 'content': 0.05112771317362785, 'timestamp': '2025-10-01 04:37:33.404189', 'step': 19929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:33.439406', 'step': 19929, 'epoch': 3} {'type': 'loss', 'content': 0.05892058461904526, 'timestamp': '2025-10-01 04:37:33.441948', 'step': 19930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:33.492595', 'step': 19930, 'epoch': 3} {'type': 'loss', 'content': 0.08876209706068039, 'timestamp': '2025-10-01 04:37:33.494725', 'step': 19931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:33.530799', 'step': 19931, 'epoch': 3} {'type': 'loss', 'content': 0.03274796903133392, 'timestamp': '2025-10-01 04:37:33.555245', 'step': 19932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:33.595619', 'step': 19932, 'epoch': 3} {'type': 'loss', 'content': 0.09960295259952545, 'timestamp': '2025-10-01 04:37:33.597886', 'step': 19933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:33.639224', 'step': 19933, 'epoch': 3} {'type': 'loss', 'content': 0.07512703537940979, 'timestamp': '2025-10-01 04:37:33.645777', 'step': 19934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:33.682790', 'step': 19934, 'epoch': 3} {'type': 'loss', 'content': 0.039627693593502045, 'timestamp': '2025-10-01 04:37:33.684829', 'step': 19935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:33.721974', 'step': 19935, 'epoch': 3} {'type': 'loss', 'content': 0.06635305285453796, 'timestamp': '2025-10-01 04:37:33.745989', 'step': 19936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:33.786060', 'step': 19936, 'epoch': 3} {'type': 'loss', 'content': 0.15087543427944183, 'timestamp': '2025-10-01 04:37:33.788798', 'step': 19937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:33.824092', 'step': 19937, 'epoch': 3} {'type': 'loss', 'content': 0.06023561954498291, 'timestamp': '2025-10-01 04:37:33.826192', 'step': 19938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:33.872314', 'step': 19938, 'epoch': 3} {'type': 'loss', 'content': 0.07329462468624115, 'timestamp': '2025-10-01 04:37:33.874705', 'step': 19939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:33.911052', 'step': 19939, 'epoch': 3} {'type': 'loss', 'content': 0.07182267308235168, 'timestamp': '2025-10-01 04:37:33.939354', 'step': 19940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:33.979059', 'step': 19940, 'epoch': 3} {'type': 'loss', 'content': 0.02288810908794403, 'timestamp': '2025-10-01 04:37:33.981291', 'step': 19941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:34.026296', 'step': 19941, 'epoch': 3} {'type': 'loss', 'content': 0.09165086597204208, 'timestamp': '2025-10-01 04:37:34.041876', 'step': 19942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:34.078717', 'step': 19942, 'epoch': 3} {'type': 'loss', 'content': 0.05791604891419411, 'timestamp': '2025-10-01 04:37:34.081033', 'step': 19943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:34.116868', 'step': 19943, 'epoch': 3} {'type': 'loss', 'content': 0.06213662028312683, 'timestamp': '2025-10-01 04:37:34.140834', 'step': 19944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:34.192249', 'step': 19944, 'epoch': 3} {'type': 'loss', 'content': 0.1382230967283249, 'timestamp': '2025-10-01 04:37:34.194639', 'step': 19945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:34.228946', 'step': 19945, 'epoch': 3} {'type': 'loss', 'content': 0.09890180081129074, 'timestamp': '2025-10-01 04:37:34.231802', 'step': 19946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:34.268059', 'step': 19946, 'epoch': 3} {'type': 'loss', 'content': 0.13615375757217407, 'timestamp': '2025-10-01 04:37:34.270341', 'step': 19947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:34.330059', 'step': 19947, 'epoch': 3} {'type': 'loss', 'content': 0.07066161185503006, 'timestamp': '2025-10-01 04:37:34.357039', 'step': 19948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:34.404169', 'step': 19948, 'epoch': 3} {'type': 'loss', 'content': 0.07426974922418594, 'timestamp': '2025-10-01 04:37:34.406128', 'step': 19949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:34.439531', 'step': 19949, 'epoch': 3} {'type': 'loss', 'content': 0.05899849161505699, 'timestamp': '2025-10-01 04:37:34.446557', 'step': 19950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:34.508289', 'step': 19950, 'epoch': 3} {'type': 'loss', 'content': 0.08377712965011597, 'timestamp': '2025-10-01 04:37:34.523805', 'step': 19951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:34.586596', 'step': 19951, 'epoch': 3} {'type': 'loss', 'content': 0.06555983424186707, 'timestamp': '2025-10-01 04:37:34.609977', 'step': 19952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:34.647060', 'step': 19952, 'epoch': 3} {'type': 'loss', 'content': 0.04257955029606819, 'timestamp': '2025-10-01 04:37:34.648954', 'step': 19953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:34.701780', 'step': 19953, 'epoch': 3} {'type': 'loss', 'content': 0.0785815492272377, 'timestamp': '2025-10-01 04:37:34.703631', 'step': 19954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:34.746268', 'step': 19954, 'epoch': 3} {'type': 'loss', 'content': 0.06416630744934082, 'timestamp': '2025-10-01 04:37:34.748260', 'step': 19955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:34.790082', 'step': 19955, 'epoch': 3} {'type': 'loss', 'content': 0.09739310294389725, 'timestamp': '2025-10-01 04:37:34.817423', 'step': 19956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:34.861459', 'step': 19956, 'epoch': 3} {'type': 'loss', 'content': 0.0798027440905571, 'timestamp': '2025-10-01 04:37:34.863530', 'step': 19957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:34.912091', 'step': 19957, 'epoch': 3} {'type': 'loss', 'content': 0.06329412013292313, 'timestamp': '2025-10-01 04:37:34.914295', 'step': 19958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:34.952210', 'step': 19958, 'epoch': 3} {'type': 'loss', 'content': 0.04746945574879646, 'timestamp': '2025-10-01 04:37:34.954698', 'step': 19959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:35.004345', 'step': 19959, 'epoch': 3} {'type': 'loss', 'content': 0.09451597183942795, 'timestamp': '2025-10-01 04:37:35.027848', 'step': 19960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:35.062338', 'step': 19960, 'epoch': 3} {'type': 'loss', 'content': 0.03482280671596527, 'timestamp': '2025-10-01 04:37:35.064311', 'step': 19961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:35.099459', 'step': 19961, 'epoch': 3} {'type': 'loss', 'content': 0.11260495334863663, 'timestamp': '2025-10-01 04:37:35.101604', 'step': 19962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:35.136921', 'step': 19962, 'epoch': 3} {'type': 'loss', 'content': 0.060727376490831375, 'timestamp': '2025-10-01 04:37:35.138805', 'step': 19963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:35.191201', 'step': 19963, 'epoch': 3} {'type': 'loss', 'content': 0.06904419511556625, 'timestamp': '2025-10-01 04:37:35.214510', 'step': 19964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:35.262515', 'step': 19964, 'epoch': 3} {'type': 'loss', 'content': 0.06499508768320084, 'timestamp': '2025-10-01 04:37:35.265318', 'step': 19965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:35.317224', 'step': 19965, 'epoch': 3} {'type': 'loss', 'content': 0.07837356626987457, 'timestamp': '2025-10-01 04:37:35.320093', 'step': 19966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:35.361884', 'step': 19966, 'epoch': 3} {'type': 'loss', 'content': 0.09164196252822876, 'timestamp': '2025-10-01 04:37:35.364271', 'step': 19967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:35.400303', 'step': 19967, 'epoch': 3} {'type': 'loss', 'content': 0.04887909069657326, 'timestamp': '2025-10-01 04:37:35.423689', 'step': 19968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:37:35.462228', 'step': 19968, 'epoch': 3} {'type': 'loss', 'content': 0.0443727970123291, 'timestamp': '2025-10-01 04:37:35.464349', 'step': 19969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:35.528160', 'step': 19969, 'epoch': 3} {'type': 'loss', 'content': 0.08339940011501312, 'timestamp': '2025-10-01 04:37:35.530623', 'step': 19970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:35.572729', 'step': 19970, 'epoch': 3} {'type': 'loss', 'content': 0.033662501722574234, 'timestamp': '2025-10-01 04:37:35.575505', 'step': 19971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:35.657589', 'step': 19971, 'epoch': 3} {'type': 'loss', 'content': 0.09941818565130234, 'timestamp': '2025-10-01 04:37:35.688133', 'step': 19972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:35.730965', 'step': 19972, 'epoch': 3} {'type': 'loss', 'content': 0.04479029029607773, 'timestamp': '2025-10-01 04:37:35.733103', 'step': 19973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:35.768913', 'step': 19973, 'epoch': 3} {'type': 'loss', 'content': 0.04680236801505089, 'timestamp': '2025-10-01 04:37:35.771150', 'step': 19974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:35.806505', 'step': 19974, 'epoch': 3} {'type': 'loss', 'content': 0.0767827183008194, 'timestamp': '2025-10-01 04:37:35.809691', 'step': 19975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:35.849239', 'step': 19975, 'epoch': 3} {'type': 'loss', 'content': 0.041076935827732086, 'timestamp': '2025-10-01 04:37:35.872774', 'step': 19976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:35.908223', 'step': 19976, 'epoch': 3} {'type': 'loss', 'content': 0.17323537170886993, 'timestamp': '2025-10-01 04:37:35.911787', 'step': 19977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:35.965216', 'step': 19977, 'epoch': 3} {'type': 'loss', 'content': 0.060846079140901566, 'timestamp': '2025-10-01 04:37:35.967550', 'step': 19978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:36.011353', 'step': 19978, 'epoch': 3} {'type': 'loss', 'content': 0.03852739930152893, 'timestamp': '2025-10-01 04:37:36.014345', 'step': 19979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:36.049883', 'step': 19979, 'epoch': 3} {'type': 'loss', 'content': 0.05095074325799942, 'timestamp': '2025-10-01 04:37:36.073577', 'step': 19980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:36.116166', 'step': 19980, 'epoch': 3} {'type': 'loss', 'content': 0.06853276491165161, 'timestamp': '2025-10-01 04:37:36.118151', 'step': 19981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:36.163938', 'step': 19981, 'epoch': 3} {'type': 'loss', 'content': 0.04149763658642769, 'timestamp': '2025-10-01 04:37:36.168462', 'step': 19982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:36.203195', 'step': 19982, 'epoch': 3} {'type': 'loss', 'content': 0.0411592535674572, 'timestamp': '2025-10-01 04:37:36.205372', 'step': 19983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:36.254105', 'step': 19983, 'epoch': 3} {'type': 'loss', 'content': 0.0823034942150116, 'timestamp': '2025-10-01 04:37:36.277596', 'step': 19984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:36.310852', 'step': 19984, 'epoch': 3} {'type': 'loss', 'content': 0.08118396997451782, 'timestamp': '2025-10-01 04:37:36.313610', 'step': 19985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:36.348683', 'step': 19985, 'epoch': 3} {'type': 'loss', 'content': 0.09489578753709793, 'timestamp': '2025-10-01 04:37:36.351495', 'step': 19986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:36.384132', 'step': 19986, 'epoch': 3} {'type': 'loss', 'content': 0.06435403972864151, 'timestamp': '2025-10-01 04:37:36.386417', 'step': 19987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:36.423009', 'step': 19987, 'epoch': 3} {'type': 'loss', 'content': 0.019524946808815002, 'timestamp': '2025-10-01 04:37:36.446771', 'step': 19988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:36.492832', 'step': 19988, 'epoch': 3} {'type': 'loss', 'content': 0.030282828956842422, 'timestamp': '2025-10-01 04:37:36.494914', 'step': 19989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:36.529957', 'step': 19989, 'epoch': 3} {'type': 'loss', 'content': 0.05385474115610123, 'timestamp': '2025-10-01 04:37:36.532217', 'step': 19990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:36.569972', 'step': 19990, 'epoch': 3} {'type': 'loss', 'content': 0.06410548835992813, 'timestamp': '2025-10-01 04:37:36.572027', 'step': 19991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:36.606532', 'step': 19991, 'epoch': 3} {'type': 'loss', 'content': 0.0265498086810112, 'timestamp': '2025-10-01 04:37:36.629993', 'step': 19992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:36.664324', 'step': 19992, 'epoch': 3} {'type': 'loss', 'content': 0.036616526544094086, 'timestamp': '2025-10-01 04:37:36.666741', 'step': 19993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:36.701185', 'step': 19993, 'epoch': 3} {'type': 'loss', 'content': 0.06660517305135727, 'timestamp': '2025-10-01 04:37:36.703200', 'step': 19994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:36.750898', 'step': 19994, 'epoch': 3} {'type': 'loss', 'content': 0.05623343959450722, 'timestamp': '2025-10-01 04:37:36.752993', 'step': 19995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:36.788586', 'step': 19995, 'epoch': 3} {'type': 'loss', 'content': 0.04331723973155022, 'timestamp': '2025-10-01 04:37:36.813469', 'step': 19996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:36.849182', 'step': 19996, 'epoch': 3} {'type': 'loss', 'content': 0.04751696065068245, 'timestamp': '2025-10-01 04:37:36.851305', 'step': 19997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:36.887233', 'step': 19997, 'epoch': 3} {'type': 'loss', 'content': 0.09342584013938904, 'timestamp': '2025-10-01 04:37:36.889297', 'step': 19998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:36.940096', 'step': 19998, 'epoch': 3} {'type': 'loss', 'content': 0.0755559653043747, 'timestamp': '2025-10-01 04:37:36.947859', 'step': 19999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:36.988813', 'step': 19999, 'epoch': 3} {'type': 'loss', 'content': 0.007604194339364767, 'timestamp': '2025-10-01 04:37:37.012821', 'step': 20000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20000', 'timestamp': '2025-10-01 04:37:41.970294', 'step': 20000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:42.017886', 'step': 20000, 'epoch': 3} {'type': 'loss', 'content': 0.08222001791000366, 'timestamp': '2025-10-01 04:37:42.020277', 'step': 20001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:42.057514', 'step': 20001, 'epoch': 3} {'type': 'loss', 'content': 0.1376924067735672, 'timestamp': '2025-10-01 04:37:42.059922', 'step': 20002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:42.095734', 'step': 20002, 'epoch': 3} {'type': 'loss', 'content': 0.06926128268241882, 'timestamp': '2025-10-01 04:37:42.097996', 'step': 20003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:42.132462', 'step': 20003, 'epoch': 3} {'type': 'loss', 'content': 0.03524264320731163, 'timestamp': '2025-10-01 04:37:42.156161', 'step': 20004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:42.189339', 'step': 20004, 'epoch': 3} {'type': 'loss', 'content': 0.04211057350039482, 'timestamp': '2025-10-01 04:37:42.191484', 'step': 20005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:42.225078', 'step': 20005, 'epoch': 3} {'type': 'loss', 'content': 0.09886667877435684, 'timestamp': '2025-10-01 04:37:42.227229', 'step': 20006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:42.276335', 'step': 20006, 'epoch': 3} {'type': 'loss', 'content': 0.12231655418872833, 'timestamp': '2025-10-01 04:37:42.278585', 'step': 20007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:42.313963', 'step': 20007, 'epoch': 3} {'type': 'loss', 'content': 0.0789383128285408, 'timestamp': '2025-10-01 04:37:42.337575', 'step': 20008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:42.374056', 'step': 20008, 'epoch': 3} {'type': 'loss', 'content': 0.0490402914583683, 'timestamp': '2025-10-01 04:37:42.376055', 'step': 20009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:42.414780', 'step': 20009, 'epoch': 3} {'type': 'loss', 'content': 0.11804286390542984, 'timestamp': '2025-10-01 04:37:42.417735', 'step': 20010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:42.454148', 'step': 20010, 'epoch': 3} {'type': 'loss', 'content': 0.08391433954238892, 'timestamp': '2025-10-01 04:37:42.456143', 'step': 20011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:42.493261', 'step': 20011, 'epoch': 3} {'type': 'loss', 'content': 0.11074280738830566, 'timestamp': '2025-10-01 04:37:42.517489', 'step': 20012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:42.566776', 'step': 20012, 'epoch': 3} {'type': 'loss', 'content': 0.04428081586956978, 'timestamp': '2025-10-01 04:37:42.569939', 'step': 20013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:42.605195', 'step': 20013, 'epoch': 3} {'type': 'loss', 'content': 0.08563503623008728, 'timestamp': '2025-10-01 04:37:42.607343', 'step': 20014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:42.641684', 'step': 20014, 'epoch': 3} {'type': 'loss', 'content': 0.1607622653245926, 'timestamp': '2025-10-01 04:37:42.644081', 'step': 20015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:42.691809', 'step': 20015, 'epoch': 3} {'type': 'loss', 'content': 0.10496097803115845, 'timestamp': '2025-10-01 04:37:42.715615', 'step': 20016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:42.764127', 'step': 20016, 'epoch': 3} {'type': 'loss', 'content': 0.06555227935314178, 'timestamp': '2025-10-01 04:37:42.766495', 'step': 20017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:42.812690', 'step': 20017, 'epoch': 3} {'type': 'loss', 'content': 0.06013651564717293, 'timestamp': '2025-10-01 04:37:42.814912', 'step': 20018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:42.847623', 'step': 20018, 'epoch': 3} {'type': 'loss', 'content': 0.13065028190612793, 'timestamp': '2025-10-01 04:37:42.850025', 'step': 20019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:42.885799', 'step': 20019, 'epoch': 3} {'type': 'loss', 'content': 0.07901134341955185, 'timestamp': '2025-10-01 04:37:42.909540', 'step': 20020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:42.944407', 'step': 20020, 'epoch': 3} {'type': 'loss', 'content': 0.03127110376954079, 'timestamp': '2025-10-01 04:37:42.946532', 'step': 20021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:42.979874', 'step': 20021, 'epoch': 3} {'type': 'loss', 'content': 0.0864928662776947, 'timestamp': '2025-10-01 04:37:42.982070', 'step': 20022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:43.015825', 'step': 20022, 'epoch': 3} {'type': 'loss', 'content': 0.13504913449287415, 'timestamp': '2025-10-01 04:37:43.018039', 'step': 20023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:43.051479', 'step': 20023, 'epoch': 3} {'type': 'loss', 'content': 0.08684348315000534, 'timestamp': '2025-10-01 04:37:43.075219', 'step': 20024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:43.107903', 'step': 20024, 'epoch': 3} {'type': 'loss', 'content': 0.031080743297934532, 'timestamp': '2025-10-01 04:37:43.109996', 'step': 20025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:43.145613', 'step': 20025, 'epoch': 3} {'type': 'loss', 'content': 0.029540149495005608, 'timestamp': '2025-10-01 04:37:43.157891', 'step': 20026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:43.190873', 'step': 20026, 'epoch': 3} {'type': 'loss', 'content': 0.06331253796815872, 'timestamp': '2025-10-01 04:37:43.193016', 'step': 20027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:43.239429', 'step': 20027, 'epoch': 3} {'type': 'loss', 'content': 0.03977164626121521, 'timestamp': '2025-10-01 04:37:43.263254', 'step': 20028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:43.310856', 'step': 20028, 'epoch': 3} {'type': 'loss', 'content': 0.12294287979602814, 'timestamp': '2025-10-01 04:37:43.313007', 'step': 20029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:43.346606', 'step': 20029, 'epoch': 3} {'type': 'loss', 'content': 0.12878920137882233, 'timestamp': '2025-10-01 04:37:43.348855', 'step': 20030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:43.381584', 'step': 20030, 'epoch': 3} {'type': 'loss', 'content': 0.11660884320735931, 'timestamp': '2025-10-01 04:37:43.383985', 'step': 20031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:43.419094', 'step': 20031, 'epoch': 3} {'type': 'loss', 'content': 0.0762794241309166, 'timestamp': '2025-10-01 04:37:43.442550', 'step': 20032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:43.477254', 'step': 20032, 'epoch': 3} {'type': 'loss', 'content': 0.07382909953594208, 'timestamp': '2025-10-01 04:37:43.479423', 'step': 20033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:43.526237', 'step': 20033, 'epoch': 3} {'type': 'loss', 'content': 0.08556178212165833, 'timestamp': '2025-10-01 04:37:43.528602', 'step': 20034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:43.562739', 'step': 20034, 'epoch': 3} {'type': 'loss', 'content': 0.08817232400178909, 'timestamp': '2025-10-01 04:37:43.564819', 'step': 20035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:43.600470', 'step': 20035, 'epoch': 3} {'type': 'loss', 'content': 0.04560159146785736, 'timestamp': '2025-10-01 04:37:43.624212', 'step': 20036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:43.658354', 'step': 20036, 'epoch': 3} {'type': 'loss', 'content': 0.08267976343631744, 'timestamp': '2025-10-01 04:37:43.660547', 'step': 20037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:43.694876', 'step': 20037, 'epoch': 3} {'type': 'loss', 'content': 0.10189225524663925, 'timestamp': '2025-10-01 04:37:43.697241', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:37:55.450782', 'step': 20038, 'epoch': 3} {'type': 'pplx', 'content': 15575.15393903246, 'timestamp': '2025-10-01 04:37:55.465664', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:55.497568', 'step': 20038, 'epoch': 3} {'type': 'loss', 'content': 0.030604148283600807, 'timestamp': '2025-10-01 04:37:55.500164', 'step': 20039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:55.536077', 'step': 20039, 'epoch': 3} {'type': 'loss', 'content': 0.11355128139257431, 'timestamp': '2025-10-01 04:37:55.560007', 'step': 20040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:55.599448', 'step': 20040, 'epoch': 3} {'type': 'loss', 'content': 0.07385706156492233, 'timestamp': '2025-10-01 04:37:55.606228', 'step': 20041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:55.654760', 'step': 20041, 'epoch': 3} {'type': 'loss', 'content': 0.055290836840867996, 'timestamp': '2025-10-01 04:37:55.656977', 'step': 20042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:55.704537', 'step': 20042, 'epoch': 3} {'type': 'loss', 'content': 0.03916727006435394, 'timestamp': '2025-10-01 04:37:55.706790', 'step': 20043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:55.762603', 'step': 20043, 'epoch': 3} {'type': 'loss', 'content': 0.0707642138004303, 'timestamp': '2025-10-01 04:37:55.786223', 'step': 20044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:55.822596', 'step': 20044, 'epoch': 3} {'type': 'loss', 'content': 0.027188247069716454, 'timestamp': '2025-10-01 04:37:55.824852', 'step': 20045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:55.869474', 'step': 20045, 'epoch': 3} {'type': 'loss', 'content': 0.08161528408527374, 'timestamp': '2025-10-01 04:37:55.872209', 'step': 20046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:55.906211', 'step': 20046, 'epoch': 3} {'type': 'loss', 'content': 0.05000711977481842, 'timestamp': '2025-10-01 04:37:55.908508', 'step': 20047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:55.944163', 'step': 20047, 'epoch': 3} {'type': 'loss', 'content': 0.05460304766893387, 'timestamp': '2025-10-01 04:37:55.968067', 'step': 20048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:56.001148', 'step': 20048, 'epoch': 3} {'type': 'loss', 'content': 0.07145202159881592, 'timestamp': '2025-10-01 04:37:56.012861', 'step': 20049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:56.045865', 'step': 20049, 'epoch': 3} {'type': 'loss', 'content': 0.06192491948604584, 'timestamp': '2025-10-01 04:37:56.048160', 'step': 20050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:56.086234', 'step': 20050, 'epoch': 3} {'type': 'loss', 'content': 0.049000464379787445, 'timestamp': '2025-10-01 04:37:56.088368', 'step': 20051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:56.121920', 'step': 20051, 'epoch': 3} {'type': 'loss', 'content': 0.04260341078042984, 'timestamp': '2025-10-01 04:37:56.145597', 'step': 20052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:56.181725', 'step': 20052, 'epoch': 3} {'type': 'loss', 'content': 0.048500947654247284, 'timestamp': '2025-10-01 04:37:56.184011', 'step': 20053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:56.219304', 'step': 20053, 'epoch': 3} {'type': 'loss', 'content': 0.03455691412091255, 'timestamp': '2025-10-01 04:37:56.227470', 'step': 20054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:56.263188', 'step': 20054, 'epoch': 3} {'type': 'loss', 'content': 0.02941165678203106, 'timestamp': '2025-10-01 04:37:56.272638', 'step': 20055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:56.312850', 'step': 20055, 'epoch': 3} {'type': 'loss', 'content': 0.0980275571346283, 'timestamp': '2025-10-01 04:37:56.336373', 'step': 20056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:56.383179', 'step': 20056, 'epoch': 3} {'type': 'loss', 'content': 0.06426220387220383, 'timestamp': '2025-10-01 04:37:56.386498', 'step': 20057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:56.422316', 'step': 20057, 'epoch': 3} {'type': 'loss', 'content': 0.05563772842288017, 'timestamp': '2025-10-01 04:37:56.424645', 'step': 20058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:56.469209', 'step': 20058, 'epoch': 3} {'type': 'loss', 'content': 0.1613127738237381, 'timestamp': '2025-10-01 04:37:56.472790', 'step': 20059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:56.509570', 'step': 20059, 'epoch': 3} {'type': 'loss', 'content': 0.06163951754570007, 'timestamp': '2025-10-01 04:37:56.533366', 'step': 20060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:56.570792', 'step': 20060, 'epoch': 3} {'type': 'loss', 'content': 0.1664382666349411, 'timestamp': '2025-10-01 04:37:56.573476', 'step': 20061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:56.609631', 'step': 20061, 'epoch': 3} {'type': 'loss', 'content': 0.055123183876276016, 'timestamp': '2025-10-01 04:37:56.612129', 'step': 20062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:56.646990', 'step': 20062, 'epoch': 3} {'type': 'loss', 'content': 0.0393342487514019, 'timestamp': '2025-10-01 04:37:56.649575', 'step': 20063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:56.683593', 'step': 20063, 'epoch': 3} {'type': 'loss', 'content': 0.03609445318579674, 'timestamp': '2025-10-01 04:37:56.707788', 'step': 20064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:56.750599', 'step': 20064, 'epoch': 3} {'type': 'loss', 'content': 0.04861927032470703, 'timestamp': '2025-10-01 04:37:56.755998', 'step': 20065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:56.795022', 'step': 20065, 'epoch': 3} {'type': 'loss', 'content': 0.057502228766679764, 'timestamp': '2025-10-01 04:37:56.797314', 'step': 20066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:56.866970', 'step': 20066, 'epoch': 3} {'type': 'loss', 'content': 0.028744805604219437, 'timestamp': '2025-10-01 04:37:56.869229', 'step': 20067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:56.907229', 'step': 20067, 'epoch': 3} {'type': 'loss', 'content': 0.07834551483392715, 'timestamp': '2025-10-01 04:37:56.935489', 'step': 20068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:56.979898', 'step': 20068, 'epoch': 3} {'type': 'loss', 'content': 0.0424136221408844, 'timestamp': '2025-10-01 04:37:56.982300', 'step': 20069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:57.034276', 'step': 20069, 'epoch': 3} {'type': 'loss', 'content': 0.07948015630245209, 'timestamp': '2025-10-01 04:37:57.036382', 'step': 20070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:57.108888', 'step': 20070, 'epoch': 3} {'type': 'loss', 'content': 0.0543239451944828, 'timestamp': '2025-10-01 04:37:57.115033', 'step': 20071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:57.180519', 'step': 20071, 'epoch': 3} {'type': 'loss', 'content': 0.05868348106741905, 'timestamp': '2025-10-01 04:37:57.209619', 'step': 20072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:57.263798', 'step': 20072, 'epoch': 3} {'type': 'loss', 'content': 0.087595134973526, 'timestamp': '2025-10-01 04:37:57.281889', 'step': 20073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:57.324149', 'step': 20073, 'epoch': 3} {'type': 'loss', 'content': 0.04757403954863548, 'timestamp': '2025-10-01 04:37:57.326354', 'step': 20074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:57.371186', 'step': 20074, 'epoch': 3} {'type': 'loss', 'content': 0.0781998410820961, 'timestamp': '2025-10-01 04:37:57.373513', 'step': 20075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:57.416066', 'step': 20075, 'epoch': 3} {'type': 'loss', 'content': 0.06436040252447128, 'timestamp': '2025-10-01 04:37:57.439737', 'step': 20076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:57.489535', 'step': 20076, 'epoch': 3} {'type': 'loss', 'content': 0.03790658339858055, 'timestamp': '2025-10-01 04:37:57.492194', 'step': 20077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:57.527596', 'step': 20077, 'epoch': 3} {'type': 'loss', 'content': 0.03762384131550789, 'timestamp': '2025-10-01 04:37:57.529969', 'step': 20078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:57.562054', 'step': 20078, 'epoch': 3} {'type': 'loss', 'content': 0.07067836076021194, 'timestamp': '2025-10-01 04:37:57.565213', 'step': 20079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:57.604118', 'step': 20079, 'epoch': 3} {'type': 'loss', 'content': 0.1029452309012413, 'timestamp': '2025-10-01 04:37:57.627846', 'step': 20080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:57.661925', 'step': 20080, 'epoch': 3} {'type': 'loss', 'content': 0.016406521201133728, 'timestamp': '2025-10-01 04:37:57.664365', 'step': 20081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:57.698481', 'step': 20081, 'epoch': 3} {'type': 'loss', 'content': 0.04141310602426529, 'timestamp': '2025-10-01 04:37:57.704185', 'step': 20082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:57.738248', 'step': 20082, 'epoch': 3} {'type': 'loss', 'content': 0.09628502279520035, 'timestamp': '2025-10-01 04:37:57.740973', 'step': 20083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:57.774543', 'step': 20083, 'epoch': 3} {'type': 'loss', 'content': 0.07086034119129181, 'timestamp': '2025-10-01 04:37:57.798405', 'step': 20084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:57.843968', 'step': 20084, 'epoch': 3} {'type': 'loss', 'content': 0.08816121518611908, 'timestamp': '2025-10-01 04:37:57.846391', 'step': 20085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:57.885446', 'step': 20085, 'epoch': 3} {'type': 'loss', 'content': 0.0598430372774601, 'timestamp': '2025-10-01 04:37:57.887915', 'step': 20086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:57.920689', 'step': 20086, 'epoch': 3} {'type': 'loss', 'content': 0.1078774631023407, 'timestamp': '2025-10-01 04:37:57.923037', 'step': 20087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:57.962628', 'step': 20087, 'epoch': 3} {'type': 'loss', 'content': 0.03111974708735943, 'timestamp': '2025-10-01 04:37:57.986619', 'step': 20088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:58.018843', 'step': 20088, 'epoch': 3} {'type': 'loss', 'content': 0.1187906414270401, 'timestamp': '2025-10-01 04:37:58.021454', 'step': 20089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:58.053802', 'step': 20089, 'epoch': 3} {'type': 'loss', 'content': 0.06584125012159348, 'timestamp': '2025-10-01 04:37:58.066187', 'step': 20090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:58.108413', 'step': 20090, 'epoch': 3} {'type': 'loss', 'content': 0.10791739821434021, 'timestamp': '2025-10-01 04:37:58.110954', 'step': 20091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:58.142610', 'step': 20091, 'epoch': 3} {'type': 'loss', 'content': 0.08549120277166367, 'timestamp': '2025-10-01 04:37:58.166805', 'step': 20092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:58.199891', 'step': 20092, 'epoch': 3} {'type': 'loss', 'content': 0.004136544186621904, 'timestamp': '2025-10-01 04:37:58.202165', 'step': 20093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:58.234469', 'step': 20093, 'epoch': 3} {'type': 'loss', 'content': 0.07186806201934814, 'timestamp': '2025-10-01 04:37:58.237418', 'step': 20094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:37:58.270468', 'step': 20094, 'epoch': 3} {'type': 'loss', 'content': 0.03927575796842575, 'timestamp': '2025-10-01 04:37:58.273224', 'step': 20095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:58.313863', 'step': 20095, 'epoch': 3} {'type': 'loss', 'content': 0.05446997657418251, 'timestamp': '2025-10-01 04:37:58.338018', 'step': 20096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:37:58.373424', 'step': 20096, 'epoch': 3} {'type': 'loss', 'content': 0.04067424312233925, 'timestamp': '2025-10-01 04:37:58.375667', 'step': 20097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:58.406503', 'step': 20097, 'epoch': 3} {'type': 'loss', 'content': 0.015358027070760727, 'timestamp': '2025-10-01 04:37:58.412584', 'step': 20098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:58.456894', 'step': 20098, 'epoch': 3} {'type': 'loss', 'content': 0.03566572442650795, 'timestamp': '2025-10-01 04:37:58.471560', 'step': 20099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:58.513036', 'step': 20099, 'epoch': 3} {'type': 'loss', 'content': 0.054399650543928146, 'timestamp': '2025-10-01 04:37:58.537160', 'step': 20100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:58.586233', 'step': 20100, 'epoch': 3} {'type': 'loss', 'content': 0.0795082077383995, 'timestamp': '2025-10-01 04:37:58.588749', 'step': 20101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:58.629463', 'step': 20101, 'epoch': 3} {'type': 'loss', 'content': 0.11458944529294968, 'timestamp': '2025-10-01 04:37:58.631572', 'step': 20102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:58.671354', 'step': 20102, 'epoch': 3} {'type': 'loss', 'content': 0.08600623160600662, 'timestamp': '2025-10-01 04:37:58.673895', 'step': 20103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:58.714716', 'step': 20103, 'epoch': 3} {'type': 'loss', 'content': 0.12027452886104584, 'timestamp': '2025-10-01 04:37:58.739395', 'step': 20104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:58.779149', 'step': 20104, 'epoch': 3} {'type': 'loss', 'content': 0.060710031539201736, 'timestamp': '2025-10-01 04:37:58.781453', 'step': 20105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:58.815848', 'step': 20105, 'epoch': 3} {'type': 'loss', 'content': 0.09667205810546875, 'timestamp': '2025-10-01 04:37:58.818018', 'step': 20106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:58.869477', 'step': 20106, 'epoch': 3} {'type': 'loss', 'content': 0.02912067621946335, 'timestamp': '2025-10-01 04:37:58.879573', 'step': 20107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:58.917163', 'step': 20107, 'epoch': 3} {'type': 'loss', 'content': 0.11894406378269196, 'timestamp': '2025-10-01 04:37:58.952409', 'step': 20108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:58.988743', 'step': 20108, 'epoch': 3} {'type': 'loss', 'content': 0.04408567398786545, 'timestamp': '2025-10-01 04:37:58.992011', 'step': 20109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:59.027799', 'step': 20109, 'epoch': 3} {'type': 'loss', 'content': 0.033669035881757736, 'timestamp': '2025-10-01 04:37:59.030172', 'step': 20110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:59.069807', 'step': 20110, 'epoch': 3} {'type': 'loss', 'content': 0.053250618278980255, 'timestamp': '2025-10-01 04:37:59.072385', 'step': 20111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:59.107437', 'step': 20111, 'epoch': 3} {'type': 'loss', 'content': 0.09691378474235535, 'timestamp': '2025-10-01 04:37:59.131436', 'step': 20112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:37:59.164619', 'step': 20112, 'epoch': 3} {'type': 'loss', 'content': 0.05094887688755989, 'timestamp': '2025-10-01 04:37:59.167200', 'step': 20113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:59.200367', 'step': 20113, 'epoch': 3} {'type': 'loss', 'content': 0.04069860652089119, 'timestamp': '2025-10-01 04:37:59.212563', 'step': 20114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:59.246022', 'step': 20114, 'epoch': 3} {'type': 'loss', 'content': 0.03951709344983101, 'timestamp': '2025-10-01 04:37:59.249215', 'step': 20115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:59.289140', 'step': 20115, 'epoch': 3} {'type': 'loss', 'content': 0.05059004947543144, 'timestamp': '2025-10-01 04:37:59.312674', 'step': 20116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:59.344894', 'step': 20116, 'epoch': 3} {'type': 'loss', 'content': 0.14127787947654724, 'timestamp': '2025-10-01 04:37:59.347048', 'step': 20117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:59.381150', 'step': 20117, 'epoch': 3} {'type': 'loss', 'content': 0.07537773996591568, 'timestamp': '2025-10-01 04:37:59.383408', 'step': 20118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:59.419719', 'step': 20118, 'epoch': 3} {'type': 'loss', 'content': 0.04139908775687218, 'timestamp': '2025-10-01 04:37:59.422389', 'step': 20119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:59.455313', 'step': 20119, 'epoch': 3} {'type': 'loss', 'content': 0.13932642340660095, 'timestamp': '2025-10-01 04:37:59.478792', 'step': 20120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:59.515624', 'step': 20120, 'epoch': 3} {'type': 'loss', 'content': 0.1447032243013382, 'timestamp': '2025-10-01 04:37:59.517749', 'step': 20121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:59.560712', 'step': 20121, 'epoch': 3} {'type': 'loss', 'content': 0.11064455658197403, 'timestamp': '2025-10-01 04:37:59.562807', 'step': 20122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:37:59.597378', 'step': 20122, 'epoch': 3} {'type': 'loss', 'content': 0.05326689034700394, 'timestamp': '2025-10-01 04:37:59.601783', 'step': 20123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:37:59.640691', 'step': 20123, 'epoch': 3} {'type': 'loss', 'content': 0.061612796038389206, 'timestamp': '2025-10-01 04:37:59.664545', 'step': 20124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:59.710589', 'step': 20124, 'epoch': 3} {'type': 'loss', 'content': 0.02431498095393181, 'timestamp': '2025-10-01 04:37:59.724098', 'step': 20125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:59.773533', 'step': 20125, 'epoch': 3} {'type': 'loss', 'content': 0.07564124464988708, 'timestamp': '2025-10-01 04:37:59.775905', 'step': 20126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:37:59.822614', 'step': 20126, 'epoch': 3} {'type': 'loss', 'content': 0.05427749082446098, 'timestamp': '2025-10-01 04:37:59.825082', 'step': 20127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:37:59.859094', 'step': 20127, 'epoch': 3} {'type': 'loss', 'content': 0.04510096460580826, 'timestamp': '2025-10-01 04:37:59.882714', 'step': 20128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:59.932648', 'step': 20128, 'epoch': 3} {'type': 'loss', 'content': 0.08777770400047302, 'timestamp': '2025-10-01 04:37:59.935380', 'step': 20129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:37:59.976148', 'step': 20129, 'epoch': 3} {'type': 'loss', 'content': 0.0655747652053833, 'timestamp': '2025-10-01 04:37:59.978908', 'step': 20130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:00.015014', 'step': 20130, 'epoch': 3} {'type': 'loss', 'content': 0.039480000734329224, 'timestamp': '2025-10-01 04:38:00.017947', 'step': 20131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:00.067837', 'step': 20131, 'epoch': 3} {'type': 'loss', 'content': 0.02549358271062374, 'timestamp': '2025-10-01 04:38:00.091435', 'step': 20132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:00.137679', 'step': 20132, 'epoch': 3} {'type': 'loss', 'content': 0.0510500967502594, 'timestamp': '2025-10-01 04:38:00.139497', 'step': 20133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:00.176828', 'step': 20133, 'epoch': 3} {'type': 'loss', 'content': 0.12157241255044937, 'timestamp': '2025-10-01 04:38:00.184536', 'step': 20134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:00.232706', 'step': 20134, 'epoch': 3} {'type': 'loss', 'content': 0.07132085412740707, 'timestamp': '2025-10-01 04:38:00.243581', 'step': 20135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:00.278774', 'step': 20135, 'epoch': 3} {'type': 'loss', 'content': 0.06982090324163437, 'timestamp': '2025-10-01 04:38:00.302443', 'step': 20136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:00.343691', 'step': 20136, 'epoch': 3} {'type': 'loss', 'content': 0.036732982844114304, 'timestamp': '2025-10-01 04:38:00.345765', 'step': 20137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:00.394612', 'step': 20137, 'epoch': 3} {'type': 'loss', 'content': 0.06345971673727036, 'timestamp': '2025-10-01 04:38:00.396506', 'step': 20138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:00.432007', 'step': 20138, 'epoch': 3} {'type': 'loss', 'content': 0.03787296637892723, 'timestamp': '2025-10-01 04:38:00.434060', 'step': 20139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:00.471011', 'step': 20139, 'epoch': 3} {'type': 'loss', 'content': 0.05115234851837158, 'timestamp': '2025-10-01 04:38:00.494642', 'step': 20140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:00.540662', 'step': 20140, 'epoch': 3} {'type': 'loss', 'content': 0.11933645606040955, 'timestamp': '2025-10-01 04:38:00.542833', 'step': 20141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:00.575223', 'step': 20141, 'epoch': 3} {'type': 'loss', 'content': 0.068385548889637, 'timestamp': '2025-10-01 04:38:00.577560', 'step': 20142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:00.625216', 'step': 20142, 'epoch': 3} {'type': 'loss', 'content': 0.0167695302516222, 'timestamp': '2025-10-01 04:38:00.627653', 'step': 20143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:00.661495', 'step': 20143, 'epoch': 3} {'type': 'loss', 'content': 0.10517065227031708, 'timestamp': '2025-10-01 04:38:00.685310', 'step': 20144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:00.718540', 'step': 20144, 'epoch': 3} {'type': 'loss', 'content': 0.033237043768167496, 'timestamp': '2025-10-01 04:38:00.720942', 'step': 20145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:00.757863', 'step': 20145, 'epoch': 3} {'type': 'loss', 'content': 0.0032936721108853817, 'timestamp': '2025-10-01 04:38:00.760346', 'step': 20146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:00.793410', 'step': 20146, 'epoch': 3} {'type': 'loss', 'content': 0.0725105032324791, 'timestamp': '2025-10-01 04:38:00.802271', 'step': 20147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:00.838649', 'step': 20147, 'epoch': 3} {'type': 'loss', 'content': 0.023314310237765312, 'timestamp': '2025-10-01 04:38:00.862058', 'step': 20148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:00.895687', 'step': 20148, 'epoch': 3} {'type': 'loss', 'content': 0.1083107739686966, 'timestamp': '2025-10-01 04:38:00.897984', 'step': 20149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:00.942061', 'step': 20149, 'epoch': 3} {'type': 'loss', 'content': 0.08678065985441208, 'timestamp': '2025-10-01 04:38:00.943978', 'step': 20150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:00.978409', 'step': 20150, 'epoch': 3} {'type': 'loss', 'content': 0.026935437694191933, 'timestamp': '2025-10-01 04:38:00.980448', 'step': 20151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:01.013377', 'step': 20151, 'epoch': 3} {'type': 'loss', 'content': 0.01353195495903492, 'timestamp': '2025-10-01 04:38:01.037015', 'step': 20152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:01.082147', 'step': 20152, 'epoch': 3} {'type': 'loss', 'content': 0.032104603946208954, 'timestamp': '2025-10-01 04:38:01.084278', 'step': 20153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:01.118690', 'step': 20153, 'epoch': 3} {'type': 'loss', 'content': 0.08049800992012024, 'timestamp': '2025-10-01 04:38:01.121087', 'step': 20154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:01.162738', 'step': 20154, 'epoch': 3} {'type': 'loss', 'content': 0.002024642424657941, 'timestamp': '2025-10-01 04:38:01.164989', 'step': 20155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:01.199535', 'step': 20155, 'epoch': 3} {'type': 'loss', 'content': 0.07413710653781891, 'timestamp': '2025-10-01 04:38:01.223128', 'step': 20156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:01.268915', 'step': 20156, 'epoch': 3} {'type': 'loss', 'content': 0.058027926832437515, 'timestamp': '2025-10-01 04:38:01.270999', 'step': 20157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:01.317156', 'step': 20157, 'epoch': 3} {'type': 'loss', 'content': 0.09338022768497467, 'timestamp': '2025-10-01 04:38:01.319092', 'step': 20158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:01.364564', 'step': 20158, 'epoch': 3} {'type': 'loss', 'content': 0.08550801128149033, 'timestamp': '2025-10-01 04:38:01.366452', 'step': 20159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:01.411593', 'step': 20159, 'epoch': 3} {'type': 'loss', 'content': 0.0501863919198513, 'timestamp': '2025-10-01 04:38:01.435118', 'step': 20160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:01.480228', 'step': 20160, 'epoch': 3} {'type': 'loss', 'content': 0.039531394839286804, 'timestamp': '2025-10-01 04:38:01.482200', 'step': 20161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:01.527438', 'step': 20161, 'epoch': 3} {'type': 'loss', 'content': 0.0710357204079628, 'timestamp': '2025-10-01 04:38:01.529508', 'step': 20162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:01.563036', 'step': 20162, 'epoch': 3} {'type': 'loss', 'content': 0.03315495699644089, 'timestamp': '2025-10-01 04:38:01.565549', 'step': 20163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:01.600411', 'step': 20163, 'epoch': 3} {'type': 'loss', 'content': 0.11500941962003708, 'timestamp': '2025-10-01 04:38:01.624183', 'step': 20164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:01.659507', 'step': 20164, 'epoch': 3} {'type': 'loss', 'content': 0.013616594485938549, 'timestamp': '2025-10-01 04:38:01.661639', 'step': 20165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:01.696024', 'step': 20165, 'epoch': 3} {'type': 'loss', 'content': 0.07619031518697739, 'timestamp': '2025-10-01 04:38:01.698846', 'step': 20166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:01.744904', 'step': 20166, 'epoch': 3} {'type': 'loss', 'content': 0.07689984887838364, 'timestamp': '2025-10-01 04:38:01.747047', 'step': 20167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:01.789611', 'step': 20167, 'epoch': 3} {'type': 'loss', 'content': 0.0760507732629776, 'timestamp': '2025-10-01 04:38:01.813269', 'step': 20168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:01.849154', 'step': 20168, 'epoch': 3} {'type': 'loss', 'content': 0.014659345149993896, 'timestamp': '2025-10-01 04:38:01.851076', 'step': 20169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:01.894219', 'step': 20169, 'epoch': 3} {'type': 'loss', 'content': 0.1171378344297409, 'timestamp': '2025-10-01 04:38:01.896904', 'step': 20170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:01.930622', 'step': 20170, 'epoch': 3} {'type': 'loss', 'content': 0.06970452517271042, 'timestamp': '2025-10-01 04:38:01.932413', 'step': 20171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:01.975272', 'step': 20171, 'epoch': 3} {'type': 'loss', 'content': 0.02836924046278, 'timestamp': '2025-10-01 04:38:02.000148', 'step': 20172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:02.045034', 'step': 20172, 'epoch': 3} {'type': 'loss', 'content': 0.09661594778299332, 'timestamp': '2025-10-01 04:38:02.047099', 'step': 20173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:02.082788', 'step': 20173, 'epoch': 3} {'type': 'loss', 'content': 0.08673045039176941, 'timestamp': '2025-10-01 04:38:02.084732', 'step': 20174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:02.118563', 'step': 20174, 'epoch': 3} {'type': 'loss', 'content': 0.09492024034261703, 'timestamp': '2025-10-01 04:38:02.120928', 'step': 20175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:02.167958', 'step': 20175, 'epoch': 3} {'type': 'loss', 'content': 0.04206661880016327, 'timestamp': '2025-10-01 04:38:02.191735', 'step': 20176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:02.228058', 'step': 20176, 'epoch': 3} {'type': 'loss', 'content': 0.06502272188663483, 'timestamp': '2025-10-01 04:38:02.230328', 'step': 20177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:02.283617', 'step': 20177, 'epoch': 3} {'type': 'loss', 'content': 0.06452517956495285, 'timestamp': '2025-10-01 04:38:02.285565', 'step': 20178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:02.320504', 'step': 20178, 'epoch': 3} {'type': 'loss', 'content': 0.028769144788384438, 'timestamp': '2025-10-01 04:38:02.322412', 'step': 20179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:02.355325', 'step': 20179, 'epoch': 3} {'type': 'loss', 'content': 0.05456280708312988, 'timestamp': '2025-10-01 04:38:02.379016', 'step': 20180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:02.412660', 'step': 20180, 'epoch': 3} {'type': 'loss', 'content': 0.006189023144543171, 'timestamp': '2025-10-01 04:38:02.414571', 'step': 20181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:02.448642', 'step': 20181, 'epoch': 3} {'type': 'loss', 'content': 0.020272450521588326, 'timestamp': '2025-10-01 04:38:02.450768', 'step': 20182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:02.484964', 'step': 20182, 'epoch': 3} {'type': 'loss', 'content': 0.036568887531757355, 'timestamp': '2025-10-01 04:38:02.487102', 'step': 20183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:02.519958', 'step': 20183, 'epoch': 3} {'type': 'loss', 'content': 0.008434002287685871, 'timestamp': '2025-10-01 04:38:02.543629', 'step': 20184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:02.577557', 'step': 20184, 'epoch': 3} {'type': 'loss', 'content': 0.11017836630344391, 'timestamp': '2025-10-01 04:38:02.579745', 'step': 20185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:02.616855', 'step': 20185, 'epoch': 3} {'type': 'loss', 'content': 0.048562370240688324, 'timestamp': '2025-10-01 04:38:02.620642', 'step': 20186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:02.655950', 'step': 20186, 'epoch': 3} {'type': 'loss', 'content': 0.07070163637399673, 'timestamp': '2025-10-01 04:38:02.658319', 'step': 20187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:02.693404', 'step': 20187, 'epoch': 3} {'type': 'loss', 'content': 0.007683951407670975, 'timestamp': '2025-10-01 04:38:02.717407', 'step': 20188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:02.778038', 'step': 20188, 'epoch': 3} {'type': 'loss', 'content': 0.07033085078001022, 'timestamp': '2025-10-01 04:38:02.780250', 'step': 20189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:02.830198', 'step': 20189, 'epoch': 3} {'type': 'loss', 'content': 0.0798233151435852, 'timestamp': '2025-10-01 04:38:02.832079', 'step': 20190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:02.878375', 'step': 20190, 'epoch': 3} {'type': 'loss', 'content': 0.06451622396707535, 'timestamp': '2025-10-01 04:38:02.880566', 'step': 20191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:02.915934', 'step': 20191, 'epoch': 3} {'type': 'loss', 'content': 0.02469118870794773, 'timestamp': '2025-10-01 04:38:02.939836', 'step': 20192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:02.974808', 'step': 20192, 'epoch': 3} {'type': 'loss', 'content': 0.10539285838603973, 'timestamp': '2025-10-01 04:38:02.976918', 'step': 20193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:03.011202', 'step': 20193, 'epoch': 3} {'type': 'loss', 'content': 0.13272912800312042, 'timestamp': '2025-10-01 04:38:03.013283', 'step': 20194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:03.047946', 'step': 20194, 'epoch': 3} {'type': 'loss', 'content': 0.09669233113527298, 'timestamp': '2025-10-01 04:38:03.050078', 'step': 20195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:03.084530', 'step': 20195, 'epoch': 3} {'type': 'loss', 'content': 0.04989813640713692, 'timestamp': '2025-10-01 04:38:03.108379', 'step': 20196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:03.142863', 'step': 20196, 'epoch': 3} {'type': 'loss', 'content': 0.02414019964635372, 'timestamp': '2025-10-01 04:38:03.144973', 'step': 20197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:03.191624', 'step': 20197, 'epoch': 3} {'type': 'loss', 'content': 0.08621068298816681, 'timestamp': '2025-10-01 04:38:03.193923', 'step': 20198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:03.249158', 'step': 20198, 'epoch': 3} {'type': 'loss', 'content': 0.2484952211380005, 'timestamp': '2025-10-01 04:38:03.251204', 'step': 20199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:03.284335', 'step': 20199, 'epoch': 3} {'type': 'loss', 'content': 0.06832869350910187, 'timestamp': '2025-10-01 04:38:03.307883', 'step': 20200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:03.359434', 'step': 20200, 'epoch': 3} {'type': 'loss', 'content': 0.10111414641141891, 'timestamp': '2025-10-01 04:38:03.361505', 'step': 20201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:03.396415', 'step': 20201, 'epoch': 3} {'type': 'loss', 'content': 0.10375239700078964, 'timestamp': '2025-10-01 04:38:03.398331', 'step': 20202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:03.441622', 'step': 20202, 'epoch': 3} {'type': 'loss', 'content': 0.09555747359991074, 'timestamp': '2025-10-01 04:38:03.444063', 'step': 20203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:03.496035', 'step': 20203, 'epoch': 3} {'type': 'loss', 'content': 0.07952157407999039, 'timestamp': '2025-10-01 04:38:03.519773', 'step': 20204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:03.552737', 'step': 20204, 'epoch': 3} {'type': 'loss', 'content': 0.06237613782286644, 'timestamp': '2025-10-01 04:38:03.560829', 'step': 20205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:03.599911', 'step': 20205, 'epoch': 3} {'type': 'loss', 'content': 0.1318584829568863, 'timestamp': '2025-10-01 04:38:03.602109', 'step': 20206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:03.649343', 'step': 20206, 'epoch': 3} {'type': 'loss', 'content': 0.04491850361227989, 'timestamp': '2025-10-01 04:38:03.651844', 'step': 20207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:03.688299', 'step': 20207, 'epoch': 3} {'type': 'loss', 'content': 0.06484033912420273, 'timestamp': '2025-10-01 04:38:03.715639', 'step': 20208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:03.749805', 'step': 20208, 'epoch': 3} {'type': 'loss', 'content': 0.04932895302772522, 'timestamp': '2025-10-01 04:38:03.753016', 'step': 20209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:03.794429', 'step': 20209, 'epoch': 3} {'type': 'loss', 'content': 0.10056754946708679, 'timestamp': '2025-10-01 04:38:03.796353', 'step': 20210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:03.830340', 'step': 20210, 'epoch': 3} {'type': 'loss', 'content': 0.10620640218257904, 'timestamp': '2025-10-01 04:38:03.832283', 'step': 20211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:03.881348', 'step': 20211, 'epoch': 3} {'type': 'loss', 'content': 0.070375956594944, 'timestamp': '2025-10-01 04:38:03.905105', 'step': 20212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:03.948480', 'step': 20212, 'epoch': 3} {'type': 'loss', 'content': 0.17478449642658234, 'timestamp': '2025-10-01 04:38:03.950952', 'step': 20213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:03.983583', 'step': 20213, 'epoch': 3} {'type': 'loss', 'content': 0.037286464124917984, 'timestamp': '2025-10-01 04:38:03.985725', 'step': 20214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:04.018660', 'step': 20214, 'epoch': 3} {'type': 'loss', 'content': 0.036709271371364594, 'timestamp': '2025-10-01 04:38:04.020822', 'step': 20215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:04.056401', 'step': 20215, 'epoch': 3} {'type': 'loss', 'content': 0.06217418611049652, 'timestamp': '2025-10-01 04:38:04.084533', 'step': 20216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:04.121104', 'step': 20216, 'epoch': 3} {'type': 'loss', 'content': 0.11470481008291245, 'timestamp': '2025-10-01 04:38:04.123024', 'step': 20217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:04.166610', 'step': 20217, 'epoch': 3} {'type': 'loss', 'content': 0.0764680877327919, 'timestamp': '2025-10-01 04:38:04.169753', 'step': 20218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:04.219652', 'step': 20218, 'epoch': 3} {'type': 'loss', 'content': 0.0648876279592514, 'timestamp': '2025-10-01 04:38:04.221837', 'step': 20219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:04.277953', 'step': 20219, 'epoch': 3} {'type': 'loss', 'content': 0.07459879666566849, 'timestamp': '2025-10-01 04:38:04.301526', 'step': 20220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:04.338653', 'step': 20220, 'epoch': 3} {'type': 'loss', 'content': 0.05434059724211693, 'timestamp': '2025-10-01 04:38:04.343764', 'step': 20221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:04.377265', 'step': 20221, 'epoch': 3} {'type': 'loss', 'content': 0.05076899006962776, 'timestamp': '2025-10-01 04:38:04.379451', 'step': 20222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:04.411771', 'step': 20222, 'epoch': 3} {'type': 'loss', 'content': 0.15092524886131287, 'timestamp': '2025-10-01 04:38:04.413749', 'step': 20223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:04.449005', 'step': 20223, 'epoch': 3} {'type': 'loss', 'content': 0.03607020527124405, 'timestamp': '2025-10-01 04:38:04.473637', 'step': 20224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:04.507335', 'step': 20224, 'epoch': 3} {'type': 'loss', 'content': 0.022282233461737633, 'timestamp': '2025-10-01 04:38:04.509512', 'step': 20225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:04.546692', 'step': 20225, 'epoch': 3} {'type': 'loss', 'content': 0.07866556197404861, 'timestamp': '2025-10-01 04:38:04.552886', 'step': 20226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:04.588794', 'step': 20226, 'epoch': 3} {'type': 'loss', 'content': 0.06592337042093277, 'timestamp': '2025-10-01 04:38:04.590864', 'step': 20227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:04.626358', 'step': 20227, 'epoch': 3} {'type': 'loss', 'content': 0.06740515679121017, 'timestamp': '2025-10-01 04:38:04.664721', 'step': 20228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:04.697660', 'step': 20228, 'epoch': 3} {'type': 'loss', 'content': 0.06458959728479385, 'timestamp': '2025-10-01 04:38:04.700565', 'step': 20229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:04.741518', 'step': 20229, 'epoch': 3} {'type': 'loss', 'content': 0.1059240847826004, 'timestamp': '2025-10-01 04:38:04.744817', 'step': 20230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:04.777624', 'step': 20230, 'epoch': 3} {'type': 'loss', 'content': 0.04618339240550995, 'timestamp': '2025-10-01 04:38:04.780203', 'step': 20231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:04.815051', 'step': 20231, 'epoch': 3} {'type': 'loss', 'content': 0.07647054642438889, 'timestamp': '2025-10-01 04:38:04.838910', 'step': 20232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:04.873147', 'step': 20232, 'epoch': 3} {'type': 'loss', 'content': 0.08927185833454132, 'timestamp': '2025-10-01 04:38:04.875895', 'step': 20233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:04.907617', 'step': 20233, 'epoch': 3} {'type': 'loss', 'content': 0.15699639916419983, 'timestamp': '2025-10-01 04:38:04.909776', 'step': 20234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:04.944644', 'step': 20234, 'epoch': 3} {'type': 'loss', 'content': 0.07971788197755814, 'timestamp': '2025-10-01 04:38:04.946764', 'step': 20235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:04.983076', 'step': 20235, 'epoch': 3} {'type': 'loss', 'content': 0.1031113862991333, 'timestamp': '2025-10-01 04:38:05.008596', 'step': 20236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:05.047599', 'step': 20236, 'epoch': 3} {'type': 'loss', 'content': 0.020544564351439476, 'timestamp': '2025-10-01 04:38:05.049643', 'step': 20237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.084719', 'step': 20237, 'epoch': 3} {'type': 'loss', 'content': 0.04728526994585991, 'timestamp': '2025-10-01 04:38:05.087067', 'step': 20238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.129646', 'step': 20238, 'epoch': 3} {'type': 'loss', 'content': 0.0510096549987793, 'timestamp': '2025-10-01 04:38:05.131763', 'step': 20239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.165147', 'step': 20239, 'epoch': 3} {'type': 'loss', 'content': 0.09756454825401306, 'timestamp': '2025-10-01 04:38:05.189322', 'step': 20240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:05.223406', 'step': 20240, 'epoch': 3} {'type': 'loss', 'content': 0.06880570948123932, 'timestamp': '2025-10-01 04:38:05.225706', 'step': 20241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:05.259378', 'step': 20241, 'epoch': 3} {'type': 'loss', 'content': 0.045915789902210236, 'timestamp': '2025-10-01 04:38:05.262041', 'step': 20242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:05.308718', 'step': 20242, 'epoch': 3} {'type': 'loss', 'content': 0.08510047197341919, 'timestamp': '2025-10-01 04:38:05.311223', 'step': 20243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:05.346472', 'step': 20243, 'epoch': 3} {'type': 'loss', 'content': 0.1529836654663086, 'timestamp': '2025-10-01 04:38:05.370504', 'step': 20244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:05.404854', 'step': 20244, 'epoch': 3} {'type': 'loss', 'content': 0.052472345530986786, 'timestamp': '2025-10-01 04:38:05.407086', 'step': 20245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.441337', 'step': 20245, 'epoch': 3} {'type': 'loss', 'content': 0.05547134578227997, 'timestamp': '2025-10-01 04:38:05.443675', 'step': 20246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.480060', 'step': 20246, 'epoch': 3} {'type': 'loss', 'content': 0.019841130822896957, 'timestamp': '2025-10-01 04:38:05.482394', 'step': 20247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.518345', 'step': 20247, 'epoch': 3} {'type': 'loss', 'content': 0.02859514206647873, 'timestamp': '2025-10-01 04:38:05.542293', 'step': 20248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:05.581325', 'step': 20248, 'epoch': 3} {'type': 'loss', 'content': 0.0504789799451828, 'timestamp': '2025-10-01 04:38:05.583742', 'step': 20249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:38:05.627607', 'step': 20249, 'epoch': 3} {'type': 'loss', 'content': 0.04403011500835419, 'timestamp': '2025-10-01 04:38:05.632464', 'step': 20250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.667816', 'step': 20250, 'epoch': 3} {'type': 'loss', 'content': 0.08171749114990234, 'timestamp': '2025-10-01 04:38:05.670575', 'step': 20251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:05.706213', 'step': 20251, 'epoch': 3} {'type': 'loss', 'content': 0.09086928516626358, 'timestamp': '2025-10-01 04:38:05.730051', 'step': 20252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.768535', 'step': 20252, 'epoch': 3} {'type': 'loss', 'content': 0.07531444728374481, 'timestamp': '2025-10-01 04:38:05.770861', 'step': 20253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:05.804506', 'step': 20253, 'epoch': 3} {'type': 'loss', 'content': 0.029805881902575493, 'timestamp': '2025-10-01 04:38:05.807317', 'step': 20254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.849858', 'step': 20254, 'epoch': 3} {'type': 'loss', 'content': 0.12343906611204147, 'timestamp': '2025-10-01 04:38:05.852628', 'step': 20255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:05.886302', 'step': 20255, 'epoch': 3} {'type': 'loss', 'content': 0.07993651926517487, 'timestamp': '2025-10-01 04:38:05.910621', 'step': 20256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.948193', 'step': 20256, 'epoch': 3} {'type': 'loss', 'content': 0.044607240706682205, 'timestamp': '2025-10-01 04:38:05.950548', 'step': 20257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:05.984416', 'step': 20257, 'epoch': 3} {'type': 'loss', 'content': 0.10205774009227753, 'timestamp': '2025-10-01 04:38:05.986952', 'step': 20258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:06.034091', 'step': 20258, 'epoch': 3} {'type': 'loss', 'content': 0.09794977307319641, 'timestamp': '2025-10-01 04:38:06.036514', 'step': 20259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:06.072285', 'step': 20259, 'epoch': 3} {'type': 'loss', 'content': 0.030233021825551987, 'timestamp': '2025-10-01 04:38:06.098032', 'step': 20260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:06.143126', 'step': 20260, 'epoch': 3} {'type': 'loss', 'content': 0.021314993500709534, 'timestamp': '2025-10-01 04:38:06.145896', 'step': 20261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:06.182302', 'step': 20261, 'epoch': 3} {'type': 'loss', 'content': 0.0016313394298776984, 'timestamp': '2025-10-01 04:38:06.185148', 'step': 20262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:06.220706', 'step': 20262, 'epoch': 3} {'type': 'loss', 'content': 0.06398869305849075, 'timestamp': '2025-10-01 04:38:06.223147', 'step': 20263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:06.259382', 'step': 20263, 'epoch': 3} {'type': 'loss', 'content': 0.14197231829166412, 'timestamp': '2025-10-01 04:38:06.283705', 'step': 20264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:06.319761', 'step': 20264, 'epoch': 3} {'type': 'loss', 'content': 0.08862840384244919, 'timestamp': '2025-10-01 04:38:06.323559', 'step': 20265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:06.358372', 'step': 20265, 'epoch': 3} {'type': 'loss', 'content': 0.07033948600292206, 'timestamp': '2025-10-01 04:38:06.361076', 'step': 20266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:06.396745', 'step': 20266, 'epoch': 3} {'type': 'loss', 'content': 0.1002822071313858, 'timestamp': '2025-10-01 04:38:06.399088', 'step': 20267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:06.434259', 'step': 20267, 'epoch': 3} {'type': 'loss', 'content': 0.07129324972629547, 'timestamp': '2025-10-01 04:38:06.474052', 'step': 20268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:06.515657', 'step': 20268, 'epoch': 3} {'type': 'loss', 'content': 0.054540857672691345, 'timestamp': '2025-10-01 04:38:06.517903', 'step': 20269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:06.554139', 'step': 20269, 'epoch': 3} {'type': 'loss', 'content': 0.12115925550460815, 'timestamp': '2025-10-01 04:38:06.556993', 'step': 20270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:06.595640', 'step': 20270, 'epoch': 3} {'type': 'loss', 'content': 0.06951203942298889, 'timestamp': '2025-10-01 04:38:06.598451', 'step': 20271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:06.635144', 'step': 20271, 'epoch': 3} {'type': 'loss', 'content': 0.06679834425449371, 'timestamp': '2025-10-01 04:38:06.659471', 'step': 20272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:06.694601', 'step': 20272, 'epoch': 3} {'type': 'loss', 'content': 0.04621517285704613, 'timestamp': '2025-10-01 04:38:06.697529', 'step': 20273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:06.730884', 'step': 20273, 'epoch': 3} {'type': 'loss', 'content': 0.06260671466588974, 'timestamp': '2025-10-01 04:38:06.733531', 'step': 20274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:06.766385', 'step': 20274, 'epoch': 3} {'type': 'loss', 'content': 0.158832848072052, 'timestamp': '2025-10-01 04:38:06.769189', 'step': 20275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:06.809171', 'step': 20275, 'epoch': 3} {'type': 'loss', 'content': 0.09400543570518494, 'timestamp': '2025-10-01 04:38:06.833478', 'step': 20276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:06.872530', 'step': 20276, 'epoch': 3} {'type': 'loss', 'content': 0.0916852131485939, 'timestamp': '2025-10-01 04:38:06.875271', 'step': 20277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:06.934246', 'step': 20277, 'epoch': 3} {'type': 'loss', 'content': 0.041943155229091644, 'timestamp': '2025-10-01 04:38:06.938129', 'step': 20278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:06.971853', 'step': 20278, 'epoch': 3} {'type': 'loss', 'content': 0.035485122352838516, 'timestamp': '2025-10-01 04:38:06.974246', 'step': 20279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:07.017631', 'step': 20279, 'epoch': 3} {'type': 'loss', 'content': 0.08772400766611099, 'timestamp': '2025-10-01 04:38:07.041253', 'step': 20280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:07.081861', 'step': 20280, 'epoch': 3} {'type': 'loss', 'content': 0.06280924379825592, 'timestamp': '2025-10-01 04:38:07.084083', 'step': 20281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:07.117915', 'step': 20281, 'epoch': 3} {'type': 'loss', 'content': 0.08211234956979752, 'timestamp': '2025-10-01 04:38:07.120135', 'step': 20282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:07.152580', 'step': 20282, 'epoch': 3} {'type': 'loss', 'content': 0.07510554790496826, 'timestamp': '2025-10-01 04:38:07.154774', 'step': 20283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:07.194288', 'step': 20283, 'epoch': 3} {'type': 'loss', 'content': 0.1023903489112854, 'timestamp': '2025-10-01 04:38:07.218047', 'step': 20284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:07.251809', 'step': 20284, 'epoch': 3} {'type': 'loss', 'content': 0.03615123778581619, 'timestamp': '2025-10-01 04:38:07.254188', 'step': 20285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:07.293596', 'step': 20285, 'epoch': 3} {'type': 'loss', 'content': 0.12981374561786652, 'timestamp': '2025-10-01 04:38:07.295901', 'step': 20286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:07.328690', 'step': 20286, 'epoch': 3} {'type': 'loss', 'content': 0.032207246869802475, 'timestamp': '2025-10-01 04:38:07.330953', 'step': 20287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:07.370605', 'step': 20287, 'epoch': 3} {'type': 'loss', 'content': 0.029886147007346153, 'timestamp': '2025-10-01 04:38:07.394602', 'step': 20288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:07.435562', 'step': 20288, 'epoch': 3} {'type': 'loss', 'content': 0.10528063774108887, 'timestamp': '2025-10-01 04:38:07.437845', 'step': 20289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:07.470406', 'step': 20289, 'epoch': 3} {'type': 'loss', 'content': 0.08841998130083084, 'timestamp': '2025-10-01 04:38:07.472767', 'step': 20290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:07.519605', 'step': 20290, 'epoch': 3} {'type': 'loss', 'content': 0.08460801839828491, 'timestamp': '2025-10-01 04:38:07.521862', 'step': 20291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:07.555167', 'step': 20291, 'epoch': 3} {'type': 'loss', 'content': 0.11945359408855438, 'timestamp': '2025-10-01 04:38:07.579585', 'step': 20292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:07.613191', 'step': 20292, 'epoch': 3} {'type': 'loss', 'content': 0.05376022309064865, 'timestamp': '2025-10-01 04:38:07.617057', 'step': 20293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:07.661836', 'step': 20293, 'epoch': 3} {'type': 'loss', 'content': 0.07447486370801926, 'timestamp': '2025-10-01 04:38:07.664498', 'step': 20294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:07.705780', 'step': 20294, 'epoch': 3} {'type': 'loss', 'content': 0.029659537598490715, 'timestamp': '2025-10-01 04:38:07.708276', 'step': 20295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:07.741768', 'step': 20295, 'epoch': 3} {'type': 'loss', 'content': 0.0386529341340065, 'timestamp': '2025-10-01 04:38:07.765740', 'step': 20296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:07.811632', 'step': 20296, 'epoch': 3} {'type': 'loss', 'content': 0.04397541284561157, 'timestamp': '2025-10-01 04:38:07.813912', 'step': 20297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:07.856727', 'step': 20297, 'epoch': 3} {'type': 'loss', 'content': 0.0466986708343029, 'timestamp': '2025-10-01 04:38:07.858987', 'step': 20298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:07.894095', 'step': 20298, 'epoch': 3} {'type': 'loss', 'content': 0.08493861556053162, 'timestamp': '2025-10-01 04:38:07.896647', 'step': 20299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:07.930472', 'step': 20299, 'epoch': 3} {'type': 'loss', 'content': 0.02877732180058956, 'timestamp': '2025-10-01 04:38:07.954615', 'step': 20300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:07.988637', 'step': 20300, 'epoch': 3} {'type': 'loss', 'content': 0.06690054386854172, 'timestamp': '2025-10-01 04:38:07.990903', 'step': 20301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:08.025337', 'step': 20301, 'epoch': 3} {'type': 'loss', 'content': 0.0550534725189209, 'timestamp': '2025-10-01 04:38:08.027714', 'step': 20302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:08.074847', 'step': 20302, 'epoch': 3} {'type': 'loss', 'content': 0.033905889838933945, 'timestamp': '2025-10-01 04:38:08.077099', 'step': 20303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:08.122713', 'step': 20303, 'epoch': 3} {'type': 'loss', 'content': 0.06736129522323608, 'timestamp': '2025-10-01 04:38:08.146617', 'step': 20304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:08.187820', 'step': 20304, 'epoch': 3} {'type': 'loss', 'content': 0.06566638499498367, 'timestamp': '2025-10-01 04:38:08.190032', 'step': 20305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:08.229089', 'step': 20305, 'epoch': 3} {'type': 'loss', 'content': 0.009908816777169704, 'timestamp': '2025-10-01 04:38:08.231512', 'step': 20306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:08.267934', 'step': 20306, 'epoch': 3} {'type': 'loss', 'content': 0.05974288284778595, 'timestamp': '2025-10-01 04:38:08.270248', 'step': 20307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:08.324785', 'step': 20307, 'epoch': 3} {'type': 'loss', 'content': 0.10804692655801773, 'timestamp': '2025-10-01 04:38:08.348963', 'step': 20308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:08.384887', 'step': 20308, 'epoch': 3} {'type': 'loss', 'content': 0.0872582271695137, 'timestamp': '2025-10-01 04:38:08.387223', 'step': 20309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:08.429655', 'step': 20309, 'epoch': 3} {'type': 'loss', 'content': 0.1282905489206314, 'timestamp': '2025-10-01 04:38:08.431811', 'step': 20310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:08.471575', 'step': 20310, 'epoch': 3} {'type': 'loss', 'content': 0.04480729252099991, 'timestamp': '2025-10-01 04:38:08.473804', 'step': 20311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:08.513735', 'step': 20311, 'epoch': 3} {'type': 'loss', 'content': 0.14105837047100067, 'timestamp': '2025-10-01 04:38:08.537880', 'step': 20312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:08.578330', 'step': 20312, 'epoch': 3} {'type': 'loss', 'content': 0.0351201631128788, 'timestamp': '2025-10-01 04:38:08.582296', 'step': 20313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:08.618503', 'step': 20313, 'epoch': 3} {'type': 'loss', 'content': 0.05632252246141434, 'timestamp': '2025-10-01 04:38:08.620880', 'step': 20314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:08.666787', 'step': 20314, 'epoch': 3} {'type': 'loss', 'content': 0.11814138293266296, 'timestamp': '2025-10-01 04:38:08.669198', 'step': 20315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:08.712047', 'step': 20315, 'epoch': 3} {'type': 'loss', 'content': 0.07057209312915802, 'timestamp': '2025-10-01 04:38:08.735960', 'step': 20316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:08.781076', 'step': 20316, 'epoch': 3} {'type': 'loss', 'content': 0.07244375348091125, 'timestamp': '2025-10-01 04:38:08.783940', 'step': 20317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:08.817037', 'step': 20317, 'epoch': 3} {'type': 'loss', 'content': 0.1304481327533722, 'timestamp': '2025-10-01 04:38:08.819069', 'step': 20318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:08.855844', 'step': 20318, 'epoch': 3} {'type': 'loss', 'content': 0.12279239296913147, 'timestamp': '2025-10-01 04:38:08.858111', 'step': 20319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:08.892213', 'step': 20319, 'epoch': 3} {'type': 'loss', 'content': 0.053211476653814316, 'timestamp': '2025-10-01 04:38:08.915966', 'step': 20320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:08.951888', 'step': 20320, 'epoch': 3} {'type': 'loss', 'content': 0.04746616259217262, 'timestamp': '2025-10-01 04:38:08.954712', 'step': 20321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:09.014890', 'step': 20321, 'epoch': 3} {'type': 'loss', 'content': 0.044995833188295364, 'timestamp': '2025-10-01 04:38:09.017093', 'step': 20322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:09.052802', 'step': 20322, 'epoch': 3} {'type': 'loss', 'content': 0.07991840690374374, 'timestamp': '2025-10-01 04:38:09.055321', 'step': 20323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:09.089932', 'step': 20323, 'epoch': 3} {'type': 'loss', 'content': 0.02526259981095791, 'timestamp': '2025-10-01 04:38:09.113621', 'step': 20324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:09.147096', 'step': 20324, 'epoch': 3} {'type': 'loss', 'content': 0.02331738919019699, 'timestamp': '2025-10-01 04:38:09.151728', 'step': 20325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:09.186876', 'step': 20325, 'epoch': 3} {'type': 'loss', 'content': 0.07768379151821136, 'timestamp': '2025-10-01 04:38:09.189117', 'step': 20326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:09.231015', 'step': 20326, 'epoch': 3} {'type': 'loss', 'content': 0.049226149916648865, 'timestamp': '2025-10-01 04:38:09.241653', 'step': 20327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:09.279182', 'step': 20327, 'epoch': 3} {'type': 'loss', 'content': 0.0495813712477684, 'timestamp': '2025-10-01 04:38:09.304817', 'step': 20328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:09.341057', 'step': 20328, 'epoch': 3} {'type': 'loss', 'content': 0.055442214012145996, 'timestamp': '2025-10-01 04:38:09.343879', 'step': 20329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:09.379422', 'step': 20329, 'epoch': 3} {'type': 'loss', 'content': 0.09076033532619476, 'timestamp': '2025-10-01 04:38:09.382610', 'step': 20330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:09.415412', 'step': 20330, 'epoch': 3} {'type': 'loss', 'content': 0.07235702127218246, 'timestamp': '2025-10-01 04:38:09.418945', 'step': 20331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:09.451866', 'step': 20331, 'epoch': 3} {'type': 'loss', 'content': 0.09671809524297714, 'timestamp': '2025-10-01 04:38:09.475816', 'step': 20332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:09.509496', 'step': 20332, 'epoch': 3} {'type': 'loss', 'content': 0.03787907585501671, 'timestamp': '2025-10-01 04:38:09.512959', 'step': 20333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:09.547795', 'step': 20333, 'epoch': 3} {'type': 'loss', 'content': 0.03596830368041992, 'timestamp': '2025-10-01 04:38:09.550102', 'step': 20334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:09.603902', 'step': 20334, 'epoch': 3} {'type': 'loss', 'content': 0.09106123447418213, 'timestamp': '2025-10-01 04:38:09.606101', 'step': 20335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:09.656381', 'step': 20335, 'epoch': 3} {'type': 'loss', 'content': 0.1133333370089531, 'timestamp': '2025-10-01 04:38:09.681178', 'step': 20336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:09.716351', 'step': 20336, 'epoch': 3} {'type': 'loss', 'content': 0.044593144208192825, 'timestamp': '2025-10-01 04:38:09.719166', 'step': 20337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:09.762151', 'step': 20337, 'epoch': 3} {'type': 'loss', 'content': 0.06421485543251038, 'timestamp': '2025-10-01 04:38:09.764606', 'step': 20338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:09.811051', 'step': 20338, 'epoch': 3} {'type': 'loss', 'content': 0.022000627592206, 'timestamp': '2025-10-01 04:38:09.815361', 'step': 20339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:09.851139', 'step': 20339, 'epoch': 3} {'type': 'loss', 'content': 0.011284512467682362, 'timestamp': '2025-10-01 04:38:09.875416', 'step': 20340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:09.918706', 'step': 20340, 'epoch': 3} {'type': 'loss', 'content': 0.058092016726732254, 'timestamp': '2025-10-01 04:38:09.920968', 'step': 20341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:09.970879', 'step': 20341, 'epoch': 3} {'type': 'loss', 'content': 0.11195728927850723, 'timestamp': '2025-10-01 04:38:09.973255', 'step': 20342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:10.019338', 'step': 20342, 'epoch': 3} {'type': 'loss', 'content': 0.017438853159546852, 'timestamp': '2025-10-01 04:38:10.021717', 'step': 20343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:10.056454', 'step': 20343, 'epoch': 3} {'type': 'loss', 'content': 0.07865484803915024, 'timestamp': '2025-10-01 04:38:10.080274', 'step': 20344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:10.114029', 'step': 20344, 'epoch': 3} {'type': 'loss', 'content': 0.03513864800333977, 'timestamp': '2025-10-01 04:38:10.116381', 'step': 20345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:10.149336', 'step': 20345, 'epoch': 3} {'type': 'loss', 'content': 0.02858646772801876, 'timestamp': '2025-10-01 04:38:10.151566', 'step': 20346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:10.182847', 'step': 20346, 'epoch': 3} {'type': 'loss', 'content': 0.056984905153512955, 'timestamp': '2025-10-01 04:38:10.185123', 'step': 20347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:10.219317', 'step': 20347, 'epoch': 3} {'type': 'loss', 'content': 0.12669014930725098, 'timestamp': '2025-10-01 04:38:10.244235', 'step': 20348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:10.277459', 'step': 20348, 'epoch': 3} {'type': 'loss', 'content': 0.037612807005643845, 'timestamp': '2025-10-01 04:38:10.279683', 'step': 20349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:10.326500', 'step': 20349, 'epoch': 3} {'type': 'loss', 'content': 0.04870765656232834, 'timestamp': '2025-10-01 04:38:10.328717', 'step': 20350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:10.360595', 'step': 20350, 'epoch': 3} {'type': 'loss', 'content': 0.041126519441604614, 'timestamp': '2025-10-01 04:38:10.362879', 'step': 20351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:10.397541', 'step': 20351, 'epoch': 3} {'type': 'loss', 'content': 0.07620251923799515, 'timestamp': '2025-10-01 04:38:10.421525', 'step': 20352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:10.454349', 'step': 20352, 'epoch': 3} {'type': 'loss', 'content': 0.08132641762495041, 'timestamp': '2025-10-01 04:38:10.456521', 'step': 20353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:10.500334', 'step': 20353, 'epoch': 3} {'type': 'loss', 'content': 0.06750594824552536, 'timestamp': '2025-10-01 04:38:10.502638', 'step': 20354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:10.534265', 'step': 20354, 'epoch': 3} {'type': 'loss', 'content': 0.07251761108636856, 'timestamp': '2025-10-01 04:38:10.536574', 'step': 20355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:10.570963', 'step': 20355, 'epoch': 3} {'type': 'loss', 'content': 0.07731202244758606, 'timestamp': '2025-10-01 04:38:10.594595', 'step': 20356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:10.626715', 'step': 20356, 'epoch': 3} {'type': 'loss', 'content': 0.03755529224872589, 'timestamp': '2025-10-01 04:38:10.628882', 'step': 20357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:10.661466', 'step': 20357, 'epoch': 3} {'type': 'loss', 'content': 0.12493003159761429, 'timestamp': '2025-10-01 04:38:10.663913', 'step': 20358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:10.699922', 'step': 20358, 'epoch': 3} {'type': 'loss', 'content': 0.0660277009010315, 'timestamp': '2025-10-01 04:38:10.702122', 'step': 20359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:10.735935', 'step': 20359, 'epoch': 3} {'type': 'loss', 'content': 0.051941078156232834, 'timestamp': '2025-10-01 04:38:10.759728', 'step': 20360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:10.791021', 'step': 20360, 'epoch': 3} {'type': 'loss', 'content': 0.16830095648765564, 'timestamp': '2025-10-01 04:38:10.793216', 'step': 20361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:10.826194', 'step': 20361, 'epoch': 3} {'type': 'loss', 'content': 0.037362631410360336, 'timestamp': '2025-10-01 04:38:10.829174', 'step': 20362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:10.863115', 'step': 20362, 'epoch': 3} {'type': 'loss', 'content': 0.056766167283058167, 'timestamp': '2025-10-01 04:38:10.865436', 'step': 20363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:10.902854', 'step': 20363, 'epoch': 3} {'type': 'loss', 'content': 0.06266932189464569, 'timestamp': '2025-10-01 04:38:10.926764', 'step': 20364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:10.963181', 'step': 20364, 'epoch': 3} {'type': 'loss', 'content': 0.03350751847028732, 'timestamp': '2025-10-01 04:38:10.965507', 'step': 20365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:11.005511', 'step': 20365, 'epoch': 3} {'type': 'loss', 'content': 0.11166111379861832, 'timestamp': '2025-10-01 04:38:11.007852', 'step': 20366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:11.049752', 'step': 20366, 'epoch': 3} {'type': 'loss', 'content': 0.015359441749751568, 'timestamp': '2025-10-01 04:38:11.051996', 'step': 20367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:11.092500', 'step': 20367, 'epoch': 3} {'type': 'loss', 'content': 0.04153047129511833, 'timestamp': '2025-10-01 04:38:11.116172', 'step': 20368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:11.150609', 'step': 20368, 'epoch': 3} {'type': 'loss', 'content': 0.0799262598156929, 'timestamp': '2025-10-01 04:38:11.153279', 'step': 20369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:11.186735', 'step': 20369, 'epoch': 3} {'type': 'loss', 'content': 0.026057936251163483, 'timestamp': '2025-10-01 04:38:11.189020', 'step': 20370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:11.229071', 'step': 20370, 'epoch': 3} {'type': 'loss', 'content': 0.13454420864582062, 'timestamp': '2025-10-01 04:38:11.231326', 'step': 20371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:11.269988', 'step': 20371, 'epoch': 3} {'type': 'loss', 'content': 0.02567112445831299, 'timestamp': '2025-10-01 04:38:11.293803', 'step': 20372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:11.329179', 'step': 20372, 'epoch': 3} {'type': 'loss', 'content': 0.0519539900124073, 'timestamp': '2025-10-01 04:38:11.331483', 'step': 20373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:11.363808', 'step': 20373, 'epoch': 3} {'type': 'loss', 'content': 0.11924004554748535, 'timestamp': '2025-10-01 04:38:11.366015', 'step': 20374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:11.403689', 'step': 20374, 'epoch': 3} {'type': 'loss', 'content': 0.05315855145454407, 'timestamp': '2025-10-01 04:38:11.405936', 'step': 20375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:11.444143', 'step': 20375, 'epoch': 3} {'type': 'loss', 'content': 0.07844238728284836, 'timestamp': '2025-10-01 04:38:11.467941', 'step': 20376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:11.502979', 'step': 20376, 'epoch': 3} {'type': 'loss', 'content': 0.07583566755056381, 'timestamp': '2025-10-01 04:38:11.505190', 'step': 20377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:11.538540', 'step': 20377, 'epoch': 3} {'type': 'loss', 'content': 0.08555646240711212, 'timestamp': '2025-10-01 04:38:11.540859', 'step': 20378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:11.573525', 'step': 20378, 'epoch': 3} {'type': 'loss', 'content': 0.04654361307621002, 'timestamp': '2025-10-01 04:38:11.575869', 'step': 20379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:11.612560', 'step': 20379, 'epoch': 3} {'type': 'loss', 'content': 0.08838623017072678, 'timestamp': '2025-10-01 04:38:11.636394', 'step': 20380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:11.670441', 'step': 20380, 'epoch': 3} {'type': 'loss', 'content': 0.024556173011660576, 'timestamp': '2025-10-01 04:38:11.672684', 'step': 20381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:11.710249', 'step': 20381, 'epoch': 3} {'type': 'loss', 'content': 0.03574248030781746, 'timestamp': '2025-10-01 04:38:11.712444', 'step': 20382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:11.745348', 'step': 20382, 'epoch': 3} {'type': 'loss', 'content': 0.023969683796167374, 'timestamp': '2025-10-01 04:38:11.747577', 'step': 20383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:11.780130', 'step': 20383, 'epoch': 3} {'type': 'loss', 'content': 0.03143370896577835, 'timestamp': '2025-10-01 04:38:11.803818', 'step': 20384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:11.836566', 'step': 20384, 'epoch': 3} {'type': 'loss', 'content': 0.027828793972730637, 'timestamp': '2025-10-01 04:38:11.838912', 'step': 20385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:11.872006', 'step': 20385, 'epoch': 3} {'type': 'loss', 'content': 0.0853060781955719, 'timestamp': '2025-10-01 04:38:11.874307', 'step': 20386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:11.909688', 'step': 20386, 'epoch': 3} {'type': 'loss', 'content': 0.08088841289281845, 'timestamp': '2025-10-01 04:38:11.912098', 'step': 20387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:11.947634', 'step': 20387, 'epoch': 3} {'type': 'loss', 'content': 0.11547614634037018, 'timestamp': '2025-10-01 04:38:11.971483', 'step': 20388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:12.006907', 'step': 20388, 'epoch': 3} {'type': 'loss', 'content': 0.025914760306477547, 'timestamp': '2025-10-01 04:38:12.009559', 'step': 20389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:12.044791', 'step': 20389, 'epoch': 3} {'type': 'loss', 'content': 0.14347071945667267, 'timestamp': '2025-10-01 04:38:12.047115', 'step': 20390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:12.084847', 'step': 20390, 'epoch': 3} {'type': 'loss', 'content': 0.08515740185976028, 'timestamp': '2025-10-01 04:38:12.088122', 'step': 20391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:12.127922', 'step': 20391, 'epoch': 3} {'type': 'loss', 'content': 0.026818813756108284, 'timestamp': '2025-10-01 04:38:12.151910', 'step': 20392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:12.198471', 'step': 20392, 'epoch': 3} {'type': 'loss', 'content': 0.056594204157590866, 'timestamp': '2025-10-01 04:38:12.201755', 'step': 20393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:12.238651', 'step': 20393, 'epoch': 3} {'type': 'loss', 'content': 0.07985991984605789, 'timestamp': '2025-10-01 04:38:12.240915', 'step': 20394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:12.278451', 'step': 20394, 'epoch': 3} {'type': 'loss', 'content': 0.05294090509414673, 'timestamp': '2025-10-01 04:38:12.281319', 'step': 20395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:12.318994', 'step': 20395, 'epoch': 3} {'type': 'loss', 'content': 0.036755092442035675, 'timestamp': '2025-10-01 04:38:12.348150', 'step': 20396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:12.384202', 'step': 20396, 'epoch': 3} {'type': 'loss', 'content': 0.09700700640678406, 'timestamp': '2025-10-01 04:38:12.386537', 'step': 20397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:12.419445', 'step': 20397, 'epoch': 3} {'type': 'loss', 'content': 0.049664583057165146, 'timestamp': '2025-10-01 04:38:12.421885', 'step': 20398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:12.461253', 'step': 20398, 'epoch': 3} {'type': 'loss', 'content': 0.09320227056741714, 'timestamp': '2025-10-01 04:38:12.464104', 'step': 20399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:12.498245', 'step': 20399, 'epoch': 3} {'type': 'loss', 'content': 0.0334073044359684, 'timestamp': '2025-10-01 04:38:12.522703', 'step': 20400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:12.574312', 'step': 20400, 'epoch': 3} {'type': 'loss', 'content': 0.11448483914136887, 'timestamp': '2025-10-01 04:38:12.576670', 'step': 20401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:12.618022', 'step': 20401, 'epoch': 3} {'type': 'loss', 'content': 0.027695024386048317, 'timestamp': '2025-10-01 04:38:12.620573', 'step': 20402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:12.655057', 'step': 20402, 'epoch': 3} {'type': 'loss', 'content': 0.06804266571998596, 'timestamp': '2025-10-01 04:38:12.657367', 'step': 20403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:12.693845', 'step': 20403, 'epoch': 3} {'type': 'loss', 'content': 0.04972388595342636, 'timestamp': '2025-10-01 04:38:12.721963', 'step': 20404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:12.760145', 'step': 20404, 'epoch': 3} {'type': 'loss', 'content': 0.05315419286489487, 'timestamp': '2025-10-01 04:38:12.764559', 'step': 20405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:12.833302', 'step': 20405, 'epoch': 3} {'type': 'loss', 'content': 0.0988827645778656, 'timestamp': '2025-10-01 04:38:12.835653', 'step': 20406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:12.874572', 'step': 20406, 'epoch': 3} {'type': 'loss', 'content': 0.04393244534730911, 'timestamp': '2025-10-01 04:38:12.876603', 'step': 20407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:12.932336', 'step': 20407, 'epoch': 3} {'type': 'loss', 'content': 0.05480019375681877, 'timestamp': '2025-10-01 04:38:12.956134', 'step': 20408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:12.989357', 'step': 20408, 'epoch': 3} {'type': 'loss', 'content': 0.04318710044026375, 'timestamp': '2025-10-01 04:38:12.991496', 'step': 20409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:13.024209', 'step': 20409, 'epoch': 3} {'type': 'loss', 'content': 0.06912779808044434, 'timestamp': '2025-10-01 04:38:13.026434', 'step': 20410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:13.059126', 'step': 20410, 'epoch': 3} {'type': 'loss', 'content': 0.0250815749168396, 'timestamp': '2025-10-01 04:38:13.061628', 'step': 20411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:13.100072', 'step': 20411, 'epoch': 3} {'type': 'loss', 'content': 0.06111869215965271, 'timestamp': '2025-10-01 04:38:13.123871', 'step': 20412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:13.163591', 'step': 20412, 'epoch': 3} {'type': 'loss', 'content': 0.03465895727276802, 'timestamp': '2025-10-01 04:38:13.165921', 'step': 20413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:13.199584', 'step': 20413, 'epoch': 3} {'type': 'loss', 'content': 0.04397044703364372, 'timestamp': '2025-10-01 04:38:13.202218', 'step': 20414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:13.234840', 'step': 20414, 'epoch': 3} {'type': 'loss', 'content': 0.07577291876077652, 'timestamp': '2025-10-01 04:38:13.237884', 'step': 20415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:13.271803', 'step': 20415, 'epoch': 3} {'type': 'loss', 'content': 0.13103298842906952, 'timestamp': '2025-10-01 04:38:13.295623', 'step': 20416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:13.328200', 'step': 20416, 'epoch': 3} {'type': 'loss', 'content': 0.04523485526442528, 'timestamp': '2025-10-01 04:38:13.330873', 'step': 20417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:13.374972', 'step': 20417, 'epoch': 3} {'type': 'loss', 'content': 0.024852611124515533, 'timestamp': '2025-10-01 04:38:13.378164', 'step': 20418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:13.412428', 'step': 20418, 'epoch': 3} {'type': 'loss', 'content': 0.0670519471168518, 'timestamp': '2025-10-01 04:38:13.414889', 'step': 20419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:13.447575', 'step': 20419, 'epoch': 3} {'type': 'loss', 'content': 0.11032538115978241, 'timestamp': '2025-10-01 04:38:13.471535', 'step': 20420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:13.503739', 'step': 20420, 'epoch': 3} {'type': 'loss', 'content': 0.04392610862851143, 'timestamp': '2025-10-01 04:38:13.524101', 'step': 20421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:13.556442', 'step': 20421, 'epoch': 3} {'type': 'loss', 'content': 0.05737489461898804, 'timestamp': '2025-10-01 04:38:13.558718', 'step': 20422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:13.590065', 'step': 20422, 'epoch': 3} {'type': 'loss', 'content': 0.0011468966258689761, 'timestamp': '2025-10-01 04:38:13.592922', 'step': 20423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:13.624972', 'step': 20423, 'epoch': 3} {'type': 'loss', 'content': 0.06903301179409027, 'timestamp': '2025-10-01 04:38:13.649076', 'step': 20424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:13.680868', 'step': 20424, 'epoch': 3} {'type': 'loss', 'content': 0.05791955813765526, 'timestamp': '2025-10-01 04:38:13.695670', 'step': 20425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:38:13.731616', 'step': 20425, 'epoch': 3} {'type': 'loss', 'content': 0.0884883776307106, 'timestamp': '2025-10-01 04:38:13.736035', 'step': 20426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:13.774431', 'step': 20426, 'epoch': 3} {'type': 'loss', 'content': 0.12309402972459793, 'timestamp': '2025-10-01 04:38:13.777134', 'step': 20427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:13.813137', 'step': 20427, 'epoch': 3} {'type': 'loss', 'content': 0.06288096308708191, 'timestamp': '2025-10-01 04:38:13.837112', 'step': 20428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:13.873702', 'step': 20428, 'epoch': 3} {'type': 'loss', 'content': 0.05589163675904274, 'timestamp': '2025-10-01 04:38:13.880280', 'step': 20429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:13.917116', 'step': 20429, 'epoch': 3} {'type': 'loss', 'content': 0.061008043587207794, 'timestamp': '2025-10-01 04:38:13.919895', 'step': 20430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:13.954566', 'step': 20430, 'epoch': 3} {'type': 'loss', 'content': 0.11063534766435623, 'timestamp': '2025-10-01 04:38:13.957747', 'step': 20431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:13.991716', 'step': 20431, 'epoch': 3} {'type': 'loss', 'content': 0.04181748256087303, 'timestamp': '2025-10-01 04:38:14.016086', 'step': 20432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:14.048759', 'step': 20432, 'epoch': 3} {'type': 'loss', 'content': 0.029717043042182922, 'timestamp': '2025-10-01 04:38:14.052400', 'step': 20433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:14.086163', 'step': 20433, 'epoch': 3} {'type': 'loss', 'content': 0.03741903230547905, 'timestamp': '2025-10-01 04:38:14.089579', 'step': 20434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:14.124251', 'step': 20434, 'epoch': 3} {'type': 'loss', 'content': 0.06308502703905106, 'timestamp': '2025-10-01 04:38:14.131983', 'step': 20435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:14.165794', 'step': 20435, 'epoch': 3} {'type': 'loss', 'content': 0.025514133274555206, 'timestamp': '2025-10-01 04:38:14.189865', 'step': 20436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:14.224792', 'step': 20436, 'epoch': 3} {'type': 'loss', 'content': 0.05532760173082352, 'timestamp': '2025-10-01 04:38:14.227407', 'step': 20437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:14.262788', 'step': 20437, 'epoch': 3} {'type': 'loss', 'content': 0.08510170131921768, 'timestamp': '2025-10-01 04:38:14.265457', 'step': 20438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:14.298798', 'step': 20438, 'epoch': 3} {'type': 'loss', 'content': 0.05990644171833992, 'timestamp': '2025-10-01 04:38:14.301610', 'step': 20439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:14.337032', 'step': 20439, 'epoch': 3} {'type': 'loss', 'content': 0.040096577256917953, 'timestamp': '2025-10-01 04:38:14.361235', 'step': 20440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:14.395302', 'step': 20440, 'epoch': 3} {'type': 'loss', 'content': 0.0517050176858902, 'timestamp': '2025-10-01 04:38:14.398021', 'step': 20441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:14.430977', 'step': 20441, 'epoch': 3} {'type': 'loss', 'content': 0.03667818754911423, 'timestamp': '2025-10-01 04:38:14.433459', 'step': 20442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:14.468020', 'step': 20442, 'epoch': 3} {'type': 'loss', 'content': 0.0355759896337986, 'timestamp': '2025-10-01 04:38:14.470611', 'step': 20443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:14.505188', 'step': 20443, 'epoch': 3} {'type': 'loss', 'content': 0.027592431753873825, 'timestamp': '2025-10-01 04:38:14.529697', 'step': 20444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:14.561871', 'step': 20444, 'epoch': 3} {'type': 'loss', 'content': 0.02752956561744213, 'timestamp': '2025-10-01 04:38:14.564223', 'step': 20445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:38:14.596051', 'step': 20445, 'epoch': 3} {'type': 'loss', 'content': 0.10153260082006454, 'timestamp': '2025-10-01 04:38:14.603067', 'step': 20446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:38:14.634535', 'step': 20446, 'epoch': 3} {'type': 'loss', 'content': 0.08576638251543045, 'timestamp': '2025-10-01 04:38:14.638955', 'step': 20447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:14.671687', 'step': 20447, 'epoch': 3} {'type': 'loss', 'content': 0.03987209126353264, 'timestamp': '2025-10-01 04:38:14.695558', 'step': 20448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:14.726630', 'step': 20448, 'epoch': 3} {'type': 'loss', 'content': 0.050327397882938385, 'timestamp': '2025-10-01 04:38:14.728940', 'step': 20449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:14.761475', 'step': 20449, 'epoch': 3} {'type': 'loss', 'content': 0.026047395542263985, 'timestamp': '2025-10-01 04:38:14.764399', 'step': 20450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:14.796921', 'step': 20450, 'epoch': 3} {'type': 'loss', 'content': 0.05465075001120567, 'timestamp': '2025-10-01 04:38:14.800153', 'step': 20451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:14.838801', 'step': 20451, 'epoch': 3} {'type': 'loss', 'content': 0.14235417544841766, 'timestamp': '2025-10-01 04:38:14.863037', 'step': 20452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:14.907502', 'step': 20452, 'epoch': 3} {'type': 'loss', 'content': 0.03022504597902298, 'timestamp': '2025-10-01 04:38:14.910412', 'step': 20453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:14.950684', 'step': 20453, 'epoch': 3} {'type': 'loss', 'content': 0.11644677817821503, 'timestamp': '2025-10-01 04:38:14.953482', 'step': 20454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:38:14.985885', 'step': 20454, 'epoch': 3} {'type': 'loss', 'content': 0.11292769759893417, 'timestamp': '2025-10-01 04:38:14.990630', 'step': 20455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:15.023001', 'step': 20455, 'epoch': 3} {'type': 'loss', 'content': 0.09977694600820541, 'timestamp': '2025-10-01 04:38:15.046848', 'step': 20456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:15.079676', 'step': 20456, 'epoch': 3} {'type': 'loss', 'content': 0.03772220388054848, 'timestamp': '2025-10-01 04:38:15.081888', 'step': 20457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:15.113905', 'step': 20457, 'epoch': 3} {'type': 'loss', 'content': 0.06253773719072342, 'timestamp': '2025-10-01 04:38:15.116137', 'step': 20458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:15.149410', 'step': 20458, 'epoch': 3} {'type': 'loss', 'content': 0.10279413312673569, 'timestamp': '2025-10-01 04:38:15.152497', 'step': 20459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:15.192849', 'step': 20459, 'epoch': 3} {'type': 'loss', 'content': 0.052352290600538254, 'timestamp': '2025-10-01 04:38:15.216668', 'step': 20460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:15.250455', 'step': 20460, 'epoch': 3} {'type': 'loss', 'content': 0.02397688664495945, 'timestamp': '2025-10-01 04:38:15.252887', 'step': 20461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:15.292077', 'step': 20461, 'epoch': 3} {'type': 'loss', 'content': 0.0548439584672451, 'timestamp': '2025-10-01 04:38:15.295029', 'step': 20462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:15.332029', 'step': 20462, 'epoch': 3} {'type': 'loss', 'content': 0.07484649866819382, 'timestamp': '2025-10-01 04:38:15.334165', 'step': 20463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:15.373797', 'step': 20463, 'epoch': 3} {'type': 'loss', 'content': 0.05289389565587044, 'timestamp': '2025-10-01 04:38:15.397642', 'step': 20464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:15.430119', 'step': 20464, 'epoch': 3} {'type': 'loss', 'content': 0.03563229739665985, 'timestamp': '2025-10-01 04:38:15.432297', 'step': 20465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:15.464803', 'step': 20465, 'epoch': 3} {'type': 'loss', 'content': 0.051579639315605164, 'timestamp': '2025-10-01 04:38:15.467007', 'step': 20466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:15.506423', 'step': 20466, 'epoch': 3} {'type': 'loss', 'content': 0.020037520676851273, 'timestamp': '2025-10-01 04:38:15.508772', 'step': 20467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:15.541164', 'step': 20467, 'epoch': 3} {'type': 'loss', 'content': 0.06558879464864731, 'timestamp': '2025-10-01 04:38:15.564912', 'step': 20468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:15.597190', 'step': 20468, 'epoch': 3} {'type': 'loss', 'content': 0.06958221644163132, 'timestamp': '2025-10-01 04:38:15.599516', 'step': 20469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:15.630942', 'step': 20469, 'epoch': 3} {'type': 'loss', 'content': 0.053301408886909485, 'timestamp': '2025-10-01 04:38:15.633996', 'step': 20470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:15.665630', 'step': 20470, 'epoch': 3} {'type': 'loss', 'content': 0.11853604018688202, 'timestamp': '2025-10-01 04:38:15.667808', 'step': 20471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:15.700201', 'step': 20471, 'epoch': 3} {'type': 'loss', 'content': 0.08336728811264038, 'timestamp': '2025-10-01 04:38:15.723990', 'step': 20472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:15.767162', 'step': 20472, 'epoch': 3} {'type': 'loss', 'content': 0.09465077519416809, 'timestamp': '2025-10-01 04:38:15.770797', 'step': 20473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:15.809236', 'step': 20473, 'epoch': 3} {'type': 'loss', 'content': 0.05533432215452194, 'timestamp': '2025-10-01 04:38:15.811758', 'step': 20474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:15.844053', 'step': 20474, 'epoch': 3} {'type': 'loss', 'content': 0.04980865493416786, 'timestamp': '2025-10-01 04:38:15.846326', 'step': 20475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:15.879004', 'step': 20475, 'epoch': 3} {'type': 'loss', 'content': 0.03665293753147125, 'timestamp': '2025-10-01 04:38:15.902791', 'step': 20476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:15.940617', 'step': 20476, 'epoch': 3} {'type': 'loss', 'content': 0.05339377373456955, 'timestamp': '2025-10-01 04:38:15.942825', 'step': 20477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:15.974424', 'step': 20477, 'epoch': 3} {'type': 'loss', 'content': 0.05200076848268509, 'timestamp': '2025-10-01 04:38:15.976783', 'step': 20478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:16.009626', 'step': 20478, 'epoch': 3} {'type': 'loss', 'content': 0.05428095906972885, 'timestamp': '2025-10-01 04:38:16.011997', 'step': 20479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:16.043607', 'step': 20479, 'epoch': 3} {'type': 'loss', 'content': 0.054005883634090424, 'timestamp': '2025-10-01 04:38:16.067508', 'step': 20480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:16.100916', 'step': 20480, 'epoch': 3} {'type': 'loss', 'content': 0.06929999589920044, 'timestamp': '2025-10-01 04:38:16.103143', 'step': 20481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:16.134872', 'step': 20481, 'epoch': 3} {'type': 'loss', 'content': 0.01513165794312954, 'timestamp': '2025-10-01 04:38:16.137068', 'step': 20482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:38:16.172202', 'step': 20482, 'epoch': 3} {'type': 'loss', 'content': 0.1119319424033165, 'timestamp': '2025-10-01 04:38:16.176550', 'step': 20483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:38:16.213382', 'step': 20483, 'epoch': 3} {'type': 'loss', 'content': 0.0537920817732811, 'timestamp': '2025-10-01 04:38:16.239008', 'step': 20484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:38:16.272104', 'step': 20484, 'epoch': 3} {'type': 'loss', 'content': 0.07755613327026367, 'timestamp': '2025-10-01 04:38:16.274634', 'step': 20485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:16.319000', 'step': 20485, 'epoch': 3} {'type': 'loss', 'content': 0.15632420778274536, 'timestamp': '2025-10-01 04:38:16.321225', 'step': 20486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:16.353773', 'step': 20486, 'epoch': 3} {'type': 'loss', 'content': 0.03310409560799599, 'timestamp': '2025-10-01 04:38:16.356015', 'step': 20487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:16.388678', 'step': 20487, 'epoch': 3} {'type': 'loss', 'content': 0.0313204862177372, 'timestamp': '2025-10-01 04:38:16.412374', 'step': 20488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:16.445604', 'step': 20488, 'epoch': 3} {'type': 'loss', 'content': 0.05275028571486473, 'timestamp': '2025-10-01 04:38:16.447846', 'step': 20489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:16.485378', 'step': 20489, 'epoch': 3} {'type': 'loss', 'content': 0.06440882384777069, 'timestamp': '2025-10-01 04:38:16.487924', 'step': 20490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:16.522552', 'step': 20490, 'epoch': 3} {'type': 'loss', 'content': 0.026593508198857307, 'timestamp': '2025-10-01 04:38:16.524765', 'step': 20491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:16.562958', 'step': 20491, 'epoch': 3} {'type': 'loss', 'content': 0.04080776870250702, 'timestamp': '2025-10-01 04:38:16.586963', 'step': 20492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:16.619180', 'step': 20492, 'epoch': 3} {'type': 'loss', 'content': 0.005502424668520689, 'timestamp': '2025-10-01 04:38:16.621603', 'step': 20493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:16.655557', 'step': 20493, 'epoch': 3} {'type': 'loss', 'content': 0.05463799089193344, 'timestamp': '2025-10-01 04:38:16.658014', 'step': 20494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:38:16.691458', 'step': 20494, 'epoch': 3} {'type': 'loss', 'content': 0.04035605862736702, 'timestamp': '2025-10-01 04:38:16.695867', 'step': 20495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:16.739612', 'step': 20495, 'epoch': 3} {'type': 'loss', 'content': 0.013838238082826138, 'timestamp': '2025-10-01 04:38:16.763341', 'step': 20496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:16.796812', 'step': 20496, 'epoch': 3} {'type': 'loss', 'content': 0.0768875777721405, 'timestamp': '2025-10-01 04:38:16.798988', 'step': 20497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:16.831551', 'step': 20497, 'epoch': 3} {'type': 'loss', 'content': 0.024825844913721085, 'timestamp': '2025-10-01 04:38:16.833759', 'step': 20498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:16.865583', 'step': 20498, 'epoch': 3} {'type': 'loss', 'content': 0.07182595133781433, 'timestamp': '2025-10-01 04:38:16.868023', 'step': 20499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:16.900994', 'step': 20499, 'epoch': 3} {'type': 'loss', 'content': 0.11437062174081802, 'timestamp': '2025-10-01 04:38:16.924709', 'step': 20500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20500', 'timestamp': '2025-10-01 04:38:21.875997', 'step': 20500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:21.909069', 'step': 20500, 'epoch': 3} {'type': 'loss', 'content': 0.018306128680706024, 'timestamp': '2025-10-01 04:38:21.911868', 'step': 20501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:21.943872', 'step': 20501, 'epoch': 3} {'type': 'loss', 'content': 0.08423464745283127, 'timestamp': '2025-10-01 04:38:21.946088', 'step': 20502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:21.979186', 'step': 20502, 'epoch': 3} {'type': 'loss', 'content': 0.05174139887094498, 'timestamp': '2025-10-01 04:38:21.981500', 'step': 20503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:22.024208', 'step': 20503, 'epoch': 3} {'type': 'loss', 'content': 0.030993549153208733, 'timestamp': '2025-10-01 04:38:22.048044', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:38:31.465220', 'step': 20504, 'epoch': 3} {'type': 'pplx', 'content': 9855.774916664503, 'timestamp': '2025-10-01 04:38:31.468494', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:31.503525', 'step': 20504, 'epoch': 3} {'type': 'loss', 'content': 0.12398505210876465, 'timestamp': '2025-10-01 04:38:31.506086', 'step': 20505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:31.538701', 'step': 20505, 'epoch': 3} {'type': 'loss', 'content': 0.015615280717611313, 'timestamp': '2025-10-01 04:38:31.541370', 'step': 20506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:31.582994', 'step': 20506, 'epoch': 3} {'type': 'loss', 'content': 0.025033896788954735, 'timestamp': '2025-10-01 04:38:31.585569', 'step': 20507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:31.625192', 'step': 20507, 'epoch': 3} {'type': 'loss', 'content': 0.047373104840517044, 'timestamp': '2025-10-01 04:38:31.648938', 'step': 20508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:31.681177', 'step': 20508, 'epoch': 3} {'type': 'loss', 'content': 0.0950954332947731, 'timestamp': '2025-10-01 04:38:31.683822', 'step': 20509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:31.721891', 'step': 20509, 'epoch': 3} {'type': 'loss', 'content': 0.03747532144188881, 'timestamp': '2025-10-01 04:38:31.724220', 'step': 20510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:31.764138', 'step': 20510, 'epoch': 3} {'type': 'loss', 'content': 0.059745028614997864, 'timestamp': '2025-10-01 04:38:31.766580', 'step': 20511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:31.813693', 'step': 20511, 'epoch': 3} {'type': 'loss', 'content': 0.05633611977100372, 'timestamp': '2025-10-01 04:38:31.837570', 'step': 20512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:31.870520', 'step': 20512, 'epoch': 3} {'type': 'loss', 'content': 0.06909748166799545, 'timestamp': '2025-10-01 04:38:31.872884', 'step': 20513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:31.909787', 'step': 20513, 'epoch': 3} {'type': 'loss', 'content': 0.06784066557884216, 'timestamp': '2025-10-01 04:38:31.912845', 'step': 20514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:31.948214', 'step': 20514, 'epoch': 3} {'type': 'loss', 'content': 0.04857844486832619, 'timestamp': '2025-10-01 04:38:31.950691', 'step': 20515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:31.987607', 'step': 20515, 'epoch': 3} {'type': 'loss', 'content': 0.059447988867759705, 'timestamp': '2025-10-01 04:38:32.011461', 'step': 20516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.045149', 'step': 20516, 'epoch': 3} {'type': 'loss', 'content': 0.10204619914293289, 'timestamp': '2025-10-01 04:38:32.047393', 'step': 20517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.091611', 'step': 20517, 'epoch': 3} {'type': 'loss', 'content': 0.0396268367767334, 'timestamp': '2025-10-01 04:38:32.094204', 'step': 20518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:32.131076', 'step': 20518, 'epoch': 3} {'type': 'loss', 'content': 0.09675759077072144, 'timestamp': '2025-10-01 04:38:32.133285', 'step': 20519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:32.165660', 'step': 20519, 'epoch': 3} {'type': 'loss', 'content': 0.05572722852230072, 'timestamp': '2025-10-01 04:38:32.189252', 'step': 20520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.222778', 'step': 20520, 'epoch': 3} {'type': 'loss', 'content': 0.05470908805727959, 'timestamp': '2025-10-01 04:38:32.224977', 'step': 20521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:32.262067', 'step': 20521, 'epoch': 3} {'type': 'loss', 'content': 0.0703452080488205, 'timestamp': '2025-10-01 04:38:32.264177', 'step': 20522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.301907', 'step': 20522, 'epoch': 3} {'type': 'loss', 'content': 0.08435051143169403, 'timestamp': '2025-10-01 04:38:32.304553', 'step': 20523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.336832', 'step': 20523, 'epoch': 3} {'type': 'loss', 'content': 0.04422618821263313, 'timestamp': '2025-10-01 04:38:32.360914', 'step': 20524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.396875', 'step': 20524, 'epoch': 3} {'type': 'loss', 'content': 0.08957011997699738, 'timestamp': '2025-10-01 04:38:32.399285', 'step': 20525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:32.435805', 'step': 20525, 'epoch': 3} {'type': 'loss', 'content': 0.030703511089086533, 'timestamp': '2025-10-01 04:38:32.438216', 'step': 20526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:32.469795', 'step': 20526, 'epoch': 3} {'type': 'loss', 'content': 0.09730471670627594, 'timestamp': '2025-10-01 04:38:32.472187', 'step': 20527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:32.508257', 'step': 20527, 'epoch': 3} {'type': 'loss', 'content': 0.07026788592338562, 'timestamp': '2025-10-01 04:38:32.531786', 'step': 20528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.564683', 'step': 20528, 'epoch': 3} {'type': 'loss', 'content': 0.047795433551073074, 'timestamp': '2025-10-01 04:38:32.566692', 'step': 20529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:32.598653', 'step': 20529, 'epoch': 3} {'type': 'loss', 'content': 0.02204250358045101, 'timestamp': '2025-10-01 04:38:32.601163', 'step': 20530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.643218', 'step': 20530, 'epoch': 3} {'type': 'loss', 'content': 0.05567004531621933, 'timestamp': '2025-10-01 04:38:32.646310', 'step': 20531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:32.680468', 'step': 20531, 'epoch': 3} {'type': 'loss', 'content': 0.04463154822587967, 'timestamp': '2025-10-01 04:38:32.704683', 'step': 20532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:32.735687', 'step': 20532, 'epoch': 3} {'type': 'loss', 'content': 0.07131919264793396, 'timestamp': '2025-10-01 04:38:32.737756', 'step': 20533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.767726', 'step': 20533, 'epoch': 3} {'type': 'loss', 'content': 0.025523187592625618, 'timestamp': '2025-10-01 04:38:32.769708', 'step': 20534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.812517', 'step': 20534, 'epoch': 3} {'type': 'loss', 'content': 0.07241753488779068, 'timestamp': '2025-10-01 04:38:32.814519', 'step': 20535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:32.848022', 'step': 20535, 'epoch': 3} {'type': 'loss', 'content': 0.04691407084465027, 'timestamp': '2025-10-01 04:38:32.871868', 'step': 20536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.906630', 'step': 20536, 'epoch': 3} {'type': 'loss', 'content': 0.05517693981528282, 'timestamp': '2025-10-01 04:38:32.908879', 'step': 20537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:32.950822', 'step': 20537, 'epoch': 3} {'type': 'loss', 'content': 0.08368604630231857, 'timestamp': '2025-10-01 04:38:32.953040', 'step': 20538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:32.984395', 'step': 20538, 'epoch': 3} {'type': 'loss', 'content': 0.039354871958494186, 'timestamp': '2025-10-01 04:38:32.987157', 'step': 20539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:33.024340', 'step': 20539, 'epoch': 3} {'type': 'loss', 'content': 0.06903379410505295, 'timestamp': '2025-10-01 04:38:33.048334', 'step': 20540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:33.087687', 'step': 20540, 'epoch': 3} {'type': 'loss', 'content': 0.10087642818689346, 'timestamp': '2025-10-01 04:38:33.089726', 'step': 20541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:33.121524', 'step': 20541, 'epoch': 3} {'type': 'loss', 'content': 0.053663987666368484, 'timestamp': '2025-10-01 04:38:33.123791', 'step': 20542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:33.168104', 'step': 20542, 'epoch': 3} {'type': 'loss', 'content': 0.12172418087720871, 'timestamp': '2025-10-01 04:38:33.170319', 'step': 20543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:33.202767', 'step': 20543, 'epoch': 3} {'type': 'loss', 'content': 0.07507211714982986, 'timestamp': '2025-10-01 04:38:33.226353', 'step': 20544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:33.257593', 'step': 20544, 'epoch': 3} {'type': 'loss', 'content': 0.081029511988163, 'timestamp': '2025-10-01 04:38:33.259682', 'step': 20545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:33.290873', 'step': 20545, 'epoch': 3} {'type': 'loss', 'content': 0.109218068420887, 'timestamp': '2025-10-01 04:38:33.293558', 'step': 20546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:33.329289', 'step': 20546, 'epoch': 3} {'type': 'loss', 'content': 0.1341610848903656, 'timestamp': '2025-10-01 04:38:33.331296', 'step': 20547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:33.362476', 'step': 20547, 'epoch': 3} {'type': 'loss', 'content': 0.08752915263175964, 'timestamp': '2025-10-01 04:38:33.386094', 'step': 20548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:33.429508', 'step': 20548, 'epoch': 3} {'type': 'loss', 'content': 0.04920687526464462, 'timestamp': '2025-10-01 04:38:33.431501', 'step': 20549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:33.463394', 'step': 20549, 'epoch': 3} {'type': 'loss', 'content': 0.08670312166213989, 'timestamp': '2025-10-01 04:38:33.465369', 'step': 20550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:33.497538', 'step': 20550, 'epoch': 3} {'type': 'loss', 'content': 0.116264708340168, 'timestamp': '2025-10-01 04:38:33.499579', 'step': 20551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:33.536297', 'step': 20551, 'epoch': 3} {'type': 'loss', 'content': 0.04532564803957939, 'timestamp': '2025-10-01 04:38:33.560028', 'step': 20552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:33.596961', 'step': 20552, 'epoch': 3} {'type': 'loss', 'content': 0.0856475904583931, 'timestamp': '2025-10-01 04:38:33.605765', 'step': 20553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:33.654866', 'step': 20553, 'epoch': 3} {'type': 'loss', 'content': 0.05824068561196327, 'timestamp': '2025-10-01 04:38:33.657205', 'step': 20554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:33.694581', 'step': 20554, 'epoch': 3} {'type': 'loss', 'content': 0.15564294159412384, 'timestamp': '2025-10-01 04:38:33.696591', 'step': 20555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:33.734730', 'step': 20555, 'epoch': 3} {'type': 'loss', 'content': 0.0663129985332489, 'timestamp': '2025-10-01 04:38:33.758516', 'step': 20556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:33.793529', 'step': 20556, 'epoch': 3} {'type': 'loss', 'content': 0.06855956465005875, 'timestamp': '2025-10-01 04:38:33.795590', 'step': 20557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:38:33.835658', 'step': 20557, 'epoch': 3} {'type': 'loss', 'content': 0.05794205889105797, 'timestamp': '2025-10-01 04:38:33.840112', 'step': 20558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:33.871454', 'step': 20558, 'epoch': 3} {'type': 'loss', 'content': 0.10553630441427231, 'timestamp': '2025-10-01 04:38:33.873426', 'step': 20559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:33.906308', 'step': 20559, 'epoch': 3} {'type': 'loss', 'content': 0.09024617820978165, 'timestamp': '2025-10-01 04:38:33.929746', 'step': 20560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:33.968376', 'step': 20560, 'epoch': 3} {'type': 'loss', 'content': 0.02960803546011448, 'timestamp': '2025-10-01 04:38:33.970533', 'step': 20561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:34.010983', 'step': 20561, 'epoch': 3} {'type': 'loss', 'content': 0.19792383909225464, 'timestamp': '2025-10-01 04:38:34.015166', 'step': 20562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:34.054630', 'step': 20562, 'epoch': 3} {'type': 'loss', 'content': 0.07627883553504944, 'timestamp': '2025-10-01 04:38:34.056667', 'step': 20563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:34.088769', 'step': 20563, 'epoch': 3} {'type': 'loss', 'content': 0.12638135254383087, 'timestamp': '2025-10-01 04:38:34.112453', 'step': 20564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:34.149564', 'step': 20564, 'epoch': 3} {'type': 'loss', 'content': 0.07846661657094955, 'timestamp': '2025-10-01 04:38:34.151570', 'step': 20565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:34.186154', 'step': 20565, 'epoch': 3} {'type': 'loss', 'content': 0.0699409544467926, 'timestamp': '2025-10-01 04:38:34.188643', 'step': 20566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:34.220514', 'step': 20566, 'epoch': 3} {'type': 'loss', 'content': 0.0367814265191555, 'timestamp': '2025-10-01 04:38:34.222625', 'step': 20567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:38:34.256056', 'step': 20567, 'epoch': 3} {'type': 'loss', 'content': 0.10780077427625656, 'timestamp': '2025-10-01 04:38:34.281386', 'step': 20568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:34.316712', 'step': 20568, 'epoch': 3} {'type': 'loss', 'content': 0.08084286749362946, 'timestamp': '2025-10-01 04:38:34.320006', 'step': 20569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:34.353103', 'step': 20569, 'epoch': 3} {'type': 'loss', 'content': 0.06403189152479172, 'timestamp': '2025-10-01 04:38:34.355166', 'step': 20570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:34.385653', 'step': 20570, 'epoch': 3} {'type': 'loss', 'content': 0.07444491237401962, 'timestamp': '2025-10-01 04:38:34.387673', 'step': 20571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:34.421900', 'step': 20571, 'epoch': 3} {'type': 'loss', 'content': 0.01648516207933426, 'timestamp': '2025-10-01 04:38:34.445548', 'step': 20572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:34.478090', 'step': 20572, 'epoch': 3} {'type': 'loss', 'content': 0.04096468910574913, 'timestamp': '2025-10-01 04:38:34.480161', 'step': 20573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:34.511072', 'step': 20573, 'epoch': 3} {'type': 'loss', 'content': 0.11416161805391312, 'timestamp': '2025-10-01 04:38:34.513303', 'step': 20574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:34.545812', 'step': 20574, 'epoch': 3} {'type': 'loss', 'content': 0.0908762514591217, 'timestamp': '2025-10-01 04:38:34.547928', 'step': 20575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:34.589503', 'step': 20575, 'epoch': 3} {'type': 'loss', 'content': 0.13538697361946106, 'timestamp': '2025-10-01 04:38:34.613057', 'step': 20576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:34.643266', 'step': 20576, 'epoch': 3} {'type': 'loss', 'content': 0.051068078726530075, 'timestamp': '2025-10-01 04:38:34.645282', 'step': 20577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:34.684850', 'step': 20577, 'epoch': 3} {'type': 'loss', 'content': 0.12150473147630692, 'timestamp': '2025-10-01 04:38:34.695168', 'step': 20578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:34.731589', 'step': 20578, 'epoch': 3} {'type': 'loss', 'content': 0.04884997755289078, 'timestamp': '2025-10-01 04:38:34.733578', 'step': 20579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:34.770767', 'step': 20579, 'epoch': 3} {'type': 'loss', 'content': 0.09072595089673996, 'timestamp': '2025-10-01 04:38:34.794397', 'step': 20580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:34.826715', 'step': 20580, 'epoch': 3} {'type': 'loss', 'content': 0.07637958228588104, 'timestamp': '2025-10-01 04:38:34.828883', 'step': 20581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:34.860125', 'step': 20581, 'epoch': 3} {'type': 'loss', 'content': 0.06062973663210869, 'timestamp': '2025-10-01 04:38:34.862377', 'step': 20582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:34.892470', 'step': 20582, 'epoch': 3} {'type': 'loss', 'content': 0.046502042561769485, 'timestamp': '2025-10-01 04:38:34.894535', 'step': 20583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:34.924506', 'step': 20583, 'epoch': 3} {'type': 'loss', 'content': 0.035565782338380814, 'timestamp': '2025-10-01 04:38:34.947975', 'step': 20584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:34.982561', 'step': 20584, 'epoch': 3} {'type': 'loss', 'content': 0.051972899585962296, 'timestamp': '2025-10-01 04:38:34.984864', 'step': 20585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:35.020337', 'step': 20585, 'epoch': 3} {'type': 'loss', 'content': 0.016626400873064995, 'timestamp': '2025-10-01 04:38:35.022340', 'step': 20586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:35.058956', 'step': 20586, 'epoch': 3} {'type': 'loss', 'content': 0.0581318698823452, 'timestamp': '2025-10-01 04:38:35.061045', 'step': 20587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:35.095185', 'step': 20587, 'epoch': 3} {'type': 'loss', 'content': 0.08460897952318192, 'timestamp': '2025-10-01 04:38:35.118688', 'step': 20588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:38:35.149390', 'step': 20588, 'epoch': 3} {'type': 'loss', 'content': 0.030567090958356857, 'timestamp': '2025-10-01 04:38:35.151597', 'step': 20589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:35.187471', 'step': 20589, 'epoch': 3} {'type': 'loss', 'content': 0.022402344271540642, 'timestamp': '2025-10-01 04:38:35.190056', 'step': 20590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:35.226674', 'step': 20590, 'epoch': 3} {'type': 'loss', 'content': 0.05748309940099716, 'timestamp': '2025-10-01 04:38:35.228658', 'step': 20591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:35.261335', 'step': 20591, 'epoch': 3} {'type': 'loss', 'content': 0.07514308393001556, 'timestamp': '2025-10-01 04:38:35.285081', 'step': 20592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:35.317363', 'step': 20592, 'epoch': 3} {'type': 'loss', 'content': 0.11402345448732376, 'timestamp': '2025-10-01 04:38:35.319484', 'step': 20593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:35.349601', 'step': 20593, 'epoch': 3} {'type': 'loss', 'content': 0.0446590818464756, 'timestamp': '2025-10-01 04:38:35.351826', 'step': 20594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:35.382266', 'step': 20594, 'epoch': 3} {'type': 'loss', 'content': 0.046364009380340576, 'timestamp': '2025-10-01 04:38:35.384302', 'step': 20595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:35.417226', 'step': 20595, 'epoch': 3} {'type': 'loss', 'content': 0.06961788982152939, 'timestamp': '2025-10-01 04:38:35.441837', 'step': 20596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:35.474041', 'step': 20596, 'epoch': 3} {'type': 'loss', 'content': 0.05192802846431732, 'timestamp': '2025-10-01 04:38:35.476159', 'step': 20597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:35.507353', 'step': 20597, 'epoch': 3} {'type': 'loss', 'content': 0.03963514417409897, 'timestamp': '2025-10-01 04:38:35.516669', 'step': 20598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:35.558965', 'step': 20598, 'epoch': 3} {'type': 'loss', 'content': 0.04713350161910057, 'timestamp': '2025-10-01 04:38:35.561083', 'step': 20599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:35.592673', 'step': 20599, 'epoch': 3} {'type': 'loss', 'content': 0.05130812153220177, 'timestamp': '2025-10-01 04:38:35.616186', 'step': 20600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:35.646356', 'step': 20600, 'epoch': 3} {'type': 'loss', 'content': 0.0815936028957367, 'timestamp': '2025-10-01 04:38:35.648627', 'step': 20601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:35.682645', 'step': 20601, 'epoch': 3} {'type': 'loss', 'content': 0.09262250363826752, 'timestamp': '2025-10-01 04:38:35.684852', 'step': 20602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:35.716004', 'step': 20602, 'epoch': 3} {'type': 'loss', 'content': 0.04485155642032623, 'timestamp': '2025-10-01 04:38:35.718036', 'step': 20603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:35.749251', 'step': 20603, 'epoch': 3} {'type': 'loss', 'content': 0.11340063810348511, 'timestamp': '2025-10-01 04:38:35.772755', 'step': 20604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:35.803353', 'step': 20604, 'epoch': 3} {'type': 'loss', 'content': 0.039750803261995316, 'timestamp': '2025-10-01 04:38:35.816009', 'step': 20605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:35.845999', 'step': 20605, 'epoch': 3} {'type': 'loss', 'content': 0.11222188174724579, 'timestamp': '2025-10-01 04:38:35.848094', 'step': 20606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:35.878005', 'step': 20606, 'epoch': 3} {'type': 'loss', 'content': 0.0791030302643776, 'timestamp': '2025-10-01 04:38:35.880101', 'step': 20607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:35.912529', 'step': 20607, 'epoch': 3} {'type': 'loss', 'content': 0.0700235441327095, 'timestamp': '2025-10-01 04:38:35.936223', 'step': 20608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:35.966103', 'step': 20608, 'epoch': 3} {'type': 'loss', 'content': 0.06890053302049637, 'timestamp': '2025-10-01 04:38:35.968411', 'step': 20609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:36.008997', 'step': 20609, 'epoch': 3} {'type': 'loss', 'content': 0.1079823300242424, 'timestamp': '2025-10-01 04:38:36.011696', 'step': 20610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:36.064411', 'step': 20610, 'epoch': 3} {'type': 'loss', 'content': 0.06260828673839569, 'timestamp': '2025-10-01 04:38:36.066753', 'step': 20611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:36.097666', 'step': 20611, 'epoch': 3} {'type': 'loss', 'content': 0.043716900050640106, 'timestamp': '2025-10-01 04:38:36.121194', 'step': 20612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:36.152497', 'step': 20612, 'epoch': 3} {'type': 'loss', 'content': 0.03374451398849487, 'timestamp': '2025-10-01 04:38:36.154524', 'step': 20613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:36.185448', 'step': 20613, 'epoch': 3} {'type': 'loss', 'content': 0.07558669149875641, 'timestamp': '2025-10-01 04:38:36.187466', 'step': 20614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:36.219993', 'step': 20614, 'epoch': 3} {'type': 'loss', 'content': 0.11887193471193314, 'timestamp': '2025-10-01 04:38:36.222426', 'step': 20615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:36.253712', 'step': 20615, 'epoch': 3} {'type': 'loss', 'content': 0.08382394909858704, 'timestamp': '2025-10-01 04:38:36.278056', 'step': 20616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:36.315573', 'step': 20616, 'epoch': 3} {'type': 'loss', 'content': 0.03412659838795662, 'timestamp': '2025-10-01 04:38:36.317672', 'step': 20617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:36.349587', 'step': 20617, 'epoch': 3} {'type': 'loss', 'content': 0.07461149990558624, 'timestamp': '2025-10-01 04:38:36.351569', 'step': 20618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:36.382970', 'step': 20618, 'epoch': 3} {'type': 'loss', 'content': 0.07555770128965378, 'timestamp': '2025-10-01 04:38:36.385057', 'step': 20619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:36.416247', 'step': 20619, 'epoch': 3} {'type': 'loss', 'content': 0.02121003158390522, 'timestamp': '2025-10-01 04:38:36.439913', 'step': 20620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:36.470780', 'step': 20620, 'epoch': 3} {'type': 'loss', 'content': 0.09966310113668442, 'timestamp': '2025-10-01 04:38:36.473369', 'step': 20621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:36.503936', 'step': 20621, 'epoch': 3} {'type': 'loss', 'content': 0.025137538090348244, 'timestamp': '2025-10-01 04:38:36.505919', 'step': 20622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:36.539661', 'step': 20622, 'epoch': 3} {'type': 'loss', 'content': 0.07632023096084595, 'timestamp': '2025-10-01 04:38:36.541687', 'step': 20623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:36.572395', 'step': 20623, 'epoch': 3} {'type': 'loss', 'content': 0.032128602266311646, 'timestamp': '2025-10-01 04:38:36.595895', 'step': 20624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:36.627461', 'step': 20624, 'epoch': 3} {'type': 'loss', 'content': 0.06938832998275757, 'timestamp': '2025-10-01 04:38:36.629636', 'step': 20625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:36.662325', 'step': 20625, 'epoch': 3} {'type': 'loss', 'content': 0.05454002693295479, 'timestamp': '2025-10-01 04:38:36.664631', 'step': 20626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:36.696217', 'step': 20626, 'epoch': 3} {'type': 'loss', 'content': 0.0765010192990303, 'timestamp': '2025-10-01 04:38:36.699045', 'step': 20627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:36.730671', 'step': 20627, 'epoch': 3} {'type': 'loss', 'content': 0.04003104940056801, 'timestamp': '2025-10-01 04:38:36.754286', 'step': 20628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:36.784970', 'step': 20628, 'epoch': 3} {'type': 'loss', 'content': 0.05756758525967598, 'timestamp': '2025-10-01 04:38:36.787038', 'step': 20629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:36.817703', 'step': 20629, 'epoch': 3} {'type': 'loss', 'content': 0.07440412044525146, 'timestamp': '2025-10-01 04:38:36.819809', 'step': 20630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:36.849884', 'step': 20630, 'epoch': 3} {'type': 'loss', 'content': 0.08862952888011932, 'timestamp': '2025-10-01 04:38:36.851919', 'step': 20631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:36.882445', 'step': 20631, 'epoch': 3} {'type': 'loss', 'content': 0.025927908718585968, 'timestamp': '2025-10-01 04:38:36.905845', 'step': 20632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:36.938240', 'step': 20632, 'epoch': 3} {'type': 'loss', 'content': 0.04248575493693352, 'timestamp': '2025-10-01 04:38:36.940328', 'step': 20633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:36.971533', 'step': 20633, 'epoch': 3} {'type': 'loss', 'content': 0.061435867100954056, 'timestamp': '2025-10-01 04:38:36.973698', 'step': 20634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:37.009863', 'step': 20634, 'epoch': 3} {'type': 'loss', 'content': 0.04199916124343872, 'timestamp': '2025-10-01 04:38:37.013241', 'step': 20635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:37.044205', 'step': 20635, 'epoch': 3} {'type': 'loss', 'content': 0.042236875742673874, 'timestamp': '2025-10-01 04:38:37.067776', 'step': 20636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:37.099065', 'step': 20636, 'epoch': 3} {'type': 'loss', 'content': 0.1104205921292305, 'timestamp': '2025-10-01 04:38:37.101257', 'step': 20637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:37.131840', 'step': 20637, 'epoch': 3} {'type': 'loss', 'content': 0.012599574401974678, 'timestamp': '2025-10-01 04:38:37.134515', 'step': 20638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:37.165292', 'step': 20638, 'epoch': 3} {'type': 'loss', 'content': 0.08711916953325272, 'timestamp': '2025-10-01 04:38:37.167603', 'step': 20639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:37.200371', 'step': 20639, 'epoch': 3} {'type': 'loss', 'content': 0.07602869719266891, 'timestamp': '2025-10-01 04:38:37.223870', 'step': 20640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:37.256830', 'step': 20640, 'epoch': 3} {'type': 'loss', 'content': 0.018009018152952194, 'timestamp': '2025-10-01 04:38:37.258898', 'step': 20641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:37.293465', 'step': 20641, 'epoch': 3} {'type': 'loss', 'content': 0.11142414808273315, 'timestamp': '2025-10-01 04:38:37.295596', 'step': 20642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:37.333685', 'step': 20642, 'epoch': 3} {'type': 'loss', 'content': 0.09699244052171707, 'timestamp': '2025-10-01 04:38:37.335714', 'step': 20643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:37.368151', 'step': 20643, 'epoch': 3} {'type': 'loss', 'content': 0.005048682447522879, 'timestamp': '2025-10-01 04:38:37.391663', 'step': 20644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:37.426440', 'step': 20644, 'epoch': 3} {'type': 'loss', 'content': 0.036609746515750885, 'timestamp': '2025-10-01 04:38:37.428571', 'step': 20645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:37.466835', 'step': 20645, 'epoch': 3} {'type': 'loss', 'content': 0.10004651546478271, 'timestamp': '2025-10-01 04:38:37.469322', 'step': 20646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:37.500799', 'step': 20646, 'epoch': 3} {'type': 'loss', 'content': 0.04994581639766693, 'timestamp': '2025-10-01 04:38:37.503277', 'step': 20647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:37.545245', 'step': 20647, 'epoch': 3} {'type': 'loss', 'content': 0.027930352836847305, 'timestamp': '2025-10-01 04:38:37.568802', 'step': 20648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:37.610796', 'step': 20648, 'epoch': 3} {'type': 'loss', 'content': 0.0513630285859108, 'timestamp': '2025-10-01 04:38:37.612899', 'step': 20649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:37.648089', 'step': 20649, 'epoch': 3} {'type': 'loss', 'content': 0.09148508310317993, 'timestamp': '2025-10-01 04:38:37.650258', 'step': 20650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:37.681803', 'step': 20650, 'epoch': 3} {'type': 'loss', 'content': 0.02974032238125801, 'timestamp': '2025-10-01 04:38:37.684217', 'step': 20651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:37.717478', 'step': 20651, 'epoch': 3} {'type': 'loss', 'content': 0.057070448994636536, 'timestamp': '2025-10-01 04:38:37.741472', 'step': 20652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:37.772422', 'step': 20652, 'epoch': 3} {'type': 'loss', 'content': 0.06776256859302521, 'timestamp': '2025-10-01 04:38:37.774588', 'step': 20653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:37.806016', 'step': 20653, 'epoch': 3} {'type': 'loss', 'content': 0.03784329444169998, 'timestamp': '2025-10-01 04:38:37.808184', 'step': 20654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:37.847613', 'step': 20654, 'epoch': 3} {'type': 'loss', 'content': 0.04587974771857262, 'timestamp': '2025-10-01 04:38:37.849859', 'step': 20655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:37.887418', 'step': 20655, 'epoch': 3} {'type': 'loss', 'content': 0.09339054673910141, 'timestamp': '2025-10-01 04:38:37.910944', 'step': 20656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:37.946476', 'step': 20656, 'epoch': 3} {'type': 'loss', 'content': 0.04289769008755684, 'timestamp': '2025-10-01 04:38:37.948629', 'step': 20657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:37.984771', 'step': 20657, 'epoch': 3} {'type': 'loss', 'content': 0.08244411647319794, 'timestamp': '2025-10-01 04:38:37.987205', 'step': 20658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.023259', 'step': 20658, 'epoch': 3} {'type': 'loss', 'content': 0.06450992822647095, 'timestamp': '2025-10-01 04:38:38.025428', 'step': 20659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:38.060826', 'step': 20659, 'epoch': 3} {'type': 'loss', 'content': 0.08342675119638443, 'timestamp': '2025-10-01 04:38:38.086040', 'step': 20660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.122107', 'step': 20660, 'epoch': 3} {'type': 'loss', 'content': 0.06347522139549255, 'timestamp': '2025-10-01 04:38:38.124321', 'step': 20661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:38.165963', 'step': 20661, 'epoch': 3} {'type': 'loss', 'content': 0.04140122979879379, 'timestamp': '2025-10-01 04:38:38.168832', 'step': 20662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.202502', 'step': 20662, 'epoch': 3} {'type': 'loss', 'content': 0.017739184200763702, 'timestamp': '2025-10-01 04:38:38.204582', 'step': 20663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:38.236625', 'step': 20663, 'epoch': 3} {'type': 'loss', 'content': 0.04738389328122139, 'timestamp': '2025-10-01 04:38:38.260234', 'step': 20664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.291791', 'step': 20664, 'epoch': 3} {'type': 'loss', 'content': 0.09914132952690125, 'timestamp': '2025-10-01 04:38:38.293820', 'step': 20665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.334554', 'step': 20665, 'epoch': 3} {'type': 'loss', 'content': 0.09226572513580322, 'timestamp': '2025-10-01 04:38:38.336535', 'step': 20666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.371460', 'step': 20666, 'epoch': 3} {'type': 'loss', 'content': 0.0315314456820488, 'timestamp': '2025-10-01 04:38:38.373517', 'step': 20667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:38.417672', 'step': 20667, 'epoch': 3} {'type': 'loss', 'content': 0.04700814560055733, 'timestamp': '2025-10-01 04:38:38.441496', 'step': 20668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.475297', 'step': 20668, 'epoch': 3} {'type': 'loss', 'content': 0.0986916571855545, 'timestamp': '2025-10-01 04:38:38.477536', 'step': 20669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.515066', 'step': 20669, 'epoch': 3} {'type': 'loss', 'content': 0.08434953540563583, 'timestamp': '2025-10-01 04:38:38.517521', 'step': 20670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.547935', 'step': 20670, 'epoch': 3} {'type': 'loss', 'content': 0.10945460945367813, 'timestamp': '2025-10-01 04:38:38.550023', 'step': 20671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.582389', 'step': 20671, 'epoch': 3} {'type': 'loss', 'content': 0.032714564353227615, 'timestamp': '2025-10-01 04:38:38.610708', 'step': 20672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:38.645718', 'step': 20672, 'epoch': 3} {'type': 'loss', 'content': 0.030508778989315033, 'timestamp': '2025-10-01 04:38:38.647658', 'step': 20673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:38.683030', 'step': 20673, 'epoch': 3} {'type': 'loss', 'content': 0.043914418667554855, 'timestamp': '2025-10-01 04:38:38.685140', 'step': 20674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:38.716859', 'step': 20674, 'epoch': 3} {'type': 'loss', 'content': 0.060173213481903076, 'timestamp': '2025-10-01 04:38:38.719012', 'step': 20675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.749697', 'step': 20675, 'epoch': 3} {'type': 'loss', 'content': 0.02405267208814621, 'timestamp': '2025-10-01 04:38:38.773637', 'step': 20676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:38.814306', 'step': 20676, 'epoch': 3} {'type': 'loss', 'content': 0.09883945435285568, 'timestamp': '2025-10-01 04:38:38.816391', 'step': 20677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:38.847910', 'step': 20677, 'epoch': 3} {'type': 'loss', 'content': 0.12855567038059235, 'timestamp': '2025-10-01 04:38:38.865378', 'step': 20678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:38.900793', 'step': 20678, 'epoch': 3} {'type': 'loss', 'content': 0.06096148118376732, 'timestamp': '2025-10-01 04:38:38.902916', 'step': 20679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.935925', 'step': 20679, 'epoch': 3} {'type': 'loss', 'content': 0.052812859416007996, 'timestamp': '2025-10-01 04:38:38.959495', 'step': 20680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:38.996201', 'step': 20680, 'epoch': 3} {'type': 'loss', 'content': 0.058474309742450714, 'timestamp': '2025-10-01 04:38:38.998147', 'step': 20681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:39.030462', 'step': 20681, 'epoch': 3} {'type': 'loss', 'content': 0.053363218903541565, 'timestamp': '2025-10-01 04:38:39.032731', 'step': 20682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:39.074174', 'step': 20682, 'epoch': 3} {'type': 'loss', 'content': 0.012705232948064804, 'timestamp': '2025-10-01 04:38:39.077974', 'step': 20683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:39.116892', 'step': 20683, 'epoch': 3} {'type': 'loss', 'content': 0.05775614455342293, 'timestamp': '2025-10-01 04:38:39.140842', 'step': 20684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:39.172715', 'step': 20684, 'epoch': 3} {'type': 'loss', 'content': 0.09203492105007172, 'timestamp': '2025-10-01 04:38:39.175461', 'step': 20685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:39.212658', 'step': 20685, 'epoch': 3} {'type': 'loss', 'content': 0.11496470868587494, 'timestamp': '2025-10-01 04:38:39.215518', 'step': 20686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:39.255662', 'step': 20686, 'epoch': 3} {'type': 'loss', 'content': 0.1276831328868866, 'timestamp': '2025-10-01 04:38:39.258105', 'step': 20687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:39.294469', 'step': 20687, 'epoch': 3} {'type': 'loss', 'content': 0.09457633644342422, 'timestamp': '2025-10-01 04:38:39.318146', 'step': 20688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:39.350672', 'step': 20688, 'epoch': 3} {'type': 'loss', 'content': 0.047899045050144196, 'timestamp': '2025-10-01 04:38:39.353065', 'step': 20689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:39.385546', 'step': 20689, 'epoch': 3} {'type': 'loss', 'content': 0.023716174066066742, 'timestamp': '2025-10-01 04:38:39.387958', 'step': 20690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:39.420204', 'step': 20690, 'epoch': 3} {'type': 'loss', 'content': 0.10648135840892792, 'timestamp': '2025-10-01 04:38:39.422756', 'step': 20691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:39.455717', 'step': 20691, 'epoch': 3} {'type': 'loss', 'content': 0.13887792825698853, 'timestamp': '2025-10-01 04:38:39.479637', 'step': 20692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:39.510822', 'step': 20692, 'epoch': 3} {'type': 'loss', 'content': 0.042129937559366226, 'timestamp': '2025-10-01 04:38:39.513286', 'step': 20693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:39.551401', 'step': 20693, 'epoch': 3} {'type': 'loss', 'content': 0.008641724474728107, 'timestamp': '2025-10-01 04:38:39.553652', 'step': 20694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:39.585167', 'step': 20694, 'epoch': 3} {'type': 'loss', 'content': 0.07110496610403061, 'timestamp': '2025-10-01 04:38:39.587577', 'step': 20695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:39.623573', 'step': 20695, 'epoch': 3} {'type': 'loss', 'content': 0.0495261549949646, 'timestamp': '2025-10-01 04:38:39.647107', 'step': 20696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:39.688932', 'step': 20696, 'epoch': 3} {'type': 'loss', 'content': 0.07193025946617126, 'timestamp': '2025-10-01 04:38:39.691272', 'step': 20697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:39.724881', 'step': 20697, 'epoch': 3} {'type': 'loss', 'content': 0.02995855174958706, 'timestamp': '2025-10-01 04:38:39.737390', 'step': 20698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:39.778358', 'step': 20698, 'epoch': 3} {'type': 'loss', 'content': 0.07126251608133316, 'timestamp': '2025-10-01 04:38:39.780522', 'step': 20699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:39.811422', 'step': 20699, 'epoch': 3} {'type': 'loss', 'content': 0.029022470116615295, 'timestamp': '2025-10-01 04:38:39.835657', 'step': 20700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:39.869616', 'step': 20700, 'epoch': 3} {'type': 'loss', 'content': 0.08756748586893082, 'timestamp': '2025-10-01 04:38:39.872092', 'step': 20701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:39.906914', 'step': 20701, 'epoch': 3} {'type': 'loss', 'content': 0.01791386306285858, 'timestamp': '2025-10-01 04:38:39.908983', 'step': 20702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:39.943261', 'step': 20702, 'epoch': 3} {'type': 'loss', 'content': 0.08673913031816483, 'timestamp': '2025-10-01 04:38:39.948070', 'step': 20703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:39.978833', 'step': 20703, 'epoch': 3} {'type': 'loss', 'content': 0.019033124670386314, 'timestamp': '2025-10-01 04:38:40.002413', 'step': 20704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:40.033839', 'step': 20704, 'epoch': 3} {'type': 'loss', 'content': 0.11788246780633926, 'timestamp': '2025-10-01 04:38:40.040528', 'step': 20705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:40.075470', 'step': 20705, 'epoch': 3} {'type': 'loss', 'content': 0.06281479448080063, 'timestamp': '2025-10-01 04:38:40.077743', 'step': 20706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:40.113446', 'step': 20706, 'epoch': 3} {'type': 'loss', 'content': 0.058143824338912964, 'timestamp': '2025-10-01 04:38:40.119657', 'step': 20707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:40.153973', 'step': 20707, 'epoch': 3} {'type': 'loss', 'content': 0.021158495917916298, 'timestamp': '2025-10-01 04:38:40.178335', 'step': 20708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:40.210809', 'step': 20708, 'epoch': 3} {'type': 'loss', 'content': 0.03467803820967674, 'timestamp': '2025-10-01 04:38:40.212886', 'step': 20709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:40.245092', 'step': 20709, 'epoch': 3} {'type': 'loss', 'content': 0.0930263102054596, 'timestamp': '2025-10-01 04:38:40.247428', 'step': 20710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:40.278598', 'step': 20710, 'epoch': 3} {'type': 'loss', 'content': 0.05160599574446678, 'timestamp': '2025-10-01 04:38:40.282137', 'step': 20711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:40.316364', 'step': 20711, 'epoch': 3} {'type': 'loss', 'content': 0.04511800408363342, 'timestamp': '2025-10-01 04:38:40.340672', 'step': 20712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:40.371811', 'step': 20712, 'epoch': 3} {'type': 'loss', 'content': 0.031521040946245193, 'timestamp': '2025-10-01 04:38:40.374264', 'step': 20713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:40.406378', 'step': 20713, 'epoch': 3} {'type': 'loss', 'content': 0.0414259247481823, 'timestamp': '2025-10-01 04:38:40.408956', 'step': 20714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:40.438804', 'step': 20714, 'epoch': 3} {'type': 'loss', 'content': 0.03985315188765526, 'timestamp': '2025-10-01 04:38:40.441184', 'step': 20715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:40.471736', 'step': 20715, 'epoch': 3} {'type': 'loss', 'content': 0.06851433217525482, 'timestamp': '2025-10-01 04:38:40.501037', 'step': 20716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:40.540941', 'step': 20716, 'epoch': 3} {'type': 'loss', 'content': 0.04316898062825203, 'timestamp': '2025-10-01 04:38:40.543379', 'step': 20717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:40.574350', 'step': 20717, 'epoch': 3} {'type': 'loss', 'content': 0.005972941406071186, 'timestamp': '2025-10-01 04:38:40.576878', 'step': 20718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:40.611599', 'step': 20718, 'epoch': 3} {'type': 'loss', 'content': 0.028463182970881462, 'timestamp': '2025-10-01 04:38:40.614031', 'step': 20719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:40.648299', 'step': 20719, 'epoch': 3} {'type': 'loss', 'content': 0.026251446455717087, 'timestamp': '2025-10-01 04:38:40.672336', 'step': 20720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:40.704051', 'step': 20720, 'epoch': 3} {'type': 'loss', 'content': 0.10743822157382965, 'timestamp': '2025-10-01 04:38:40.706577', 'step': 20721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:40.742517', 'step': 20721, 'epoch': 3} {'type': 'loss', 'content': 0.09596927464008331, 'timestamp': '2025-10-01 04:38:40.745174', 'step': 20722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:40.781816', 'step': 20722, 'epoch': 3} {'type': 'loss', 'content': 0.0009747442090883851, 'timestamp': '2025-10-01 04:38:40.783953', 'step': 20723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:40.816322', 'step': 20723, 'epoch': 3} {'type': 'loss', 'content': 0.04970842972397804, 'timestamp': '2025-10-01 04:38:40.839947', 'step': 20724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:40.883481', 'step': 20724, 'epoch': 3} {'type': 'loss', 'content': 0.04890856519341469, 'timestamp': '2025-10-01 04:38:40.885822', 'step': 20725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:40.926896', 'step': 20725, 'epoch': 3} {'type': 'loss', 'content': 0.06957755237817764, 'timestamp': '2025-10-01 04:38:40.929437', 'step': 20726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:40.974363', 'step': 20726, 'epoch': 3} {'type': 'loss', 'content': 0.048826828598976135, 'timestamp': '2025-10-01 04:38:40.976633', 'step': 20727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:41.008131', 'step': 20727, 'epoch': 3} {'type': 'loss', 'content': 0.09067676216363907, 'timestamp': '2025-10-01 04:38:41.031657', 'step': 20728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:41.065185', 'step': 20728, 'epoch': 3} {'type': 'loss', 'content': 0.051453784108161926, 'timestamp': '2025-10-01 04:38:41.067174', 'step': 20729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:41.099502', 'step': 20729, 'epoch': 3} {'type': 'loss', 'content': 0.021912219002842903, 'timestamp': '2025-10-01 04:38:41.101745', 'step': 20730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:41.137000', 'step': 20730, 'epoch': 3} {'type': 'loss', 'content': 0.04506411403417587, 'timestamp': '2025-10-01 04:38:41.139067', 'step': 20731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:41.174221', 'step': 20731, 'epoch': 3} {'type': 'loss', 'content': 0.04173198342323303, 'timestamp': '2025-10-01 04:38:41.197882', 'step': 20732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:41.230710', 'step': 20732, 'epoch': 3} {'type': 'loss', 'content': 0.03715354949235916, 'timestamp': '2025-10-01 04:38:41.232689', 'step': 20733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:41.271115', 'step': 20733, 'epoch': 3} {'type': 'loss', 'content': 0.07869840413331985, 'timestamp': '2025-10-01 04:38:41.273050', 'step': 20734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:41.305078', 'step': 20734, 'epoch': 3} {'type': 'loss', 'content': 0.07760416716337204, 'timestamp': '2025-10-01 04:38:41.307157', 'step': 20735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:41.343700', 'step': 20735, 'epoch': 3} {'type': 'loss', 'content': 0.017260326072573662, 'timestamp': '2025-10-01 04:38:41.367251', 'step': 20736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:41.398037', 'step': 20736, 'epoch': 3} {'type': 'loss', 'content': 0.046881888061761856, 'timestamp': '2025-10-01 04:38:41.400057', 'step': 20737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:41.431307', 'step': 20737, 'epoch': 3} {'type': 'loss', 'content': 0.08232090622186661, 'timestamp': '2025-10-01 04:38:41.433289', 'step': 20738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:41.467950', 'step': 20738, 'epoch': 3} {'type': 'loss', 'content': 0.007720306981354952, 'timestamp': '2025-10-01 04:38:41.476056', 'step': 20739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:41.509267', 'step': 20739, 'epoch': 3} {'type': 'loss', 'content': 0.13044407963752747, 'timestamp': '2025-10-01 04:38:41.532897', 'step': 20740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:41.572689', 'step': 20740, 'epoch': 3} {'type': 'loss', 'content': 0.055767972022295, 'timestamp': '2025-10-01 04:38:41.574840', 'step': 20741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:41.606344', 'step': 20741, 'epoch': 3} {'type': 'loss', 'content': 0.09134911745786667, 'timestamp': '2025-10-01 04:38:41.608811', 'step': 20742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:41.648736', 'step': 20742, 'epoch': 3} {'type': 'loss', 'content': 0.06524842977523804, 'timestamp': '2025-10-01 04:38:41.651561', 'step': 20743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:41.684369', 'step': 20743, 'epoch': 3} {'type': 'loss', 'content': 0.06919816136360168, 'timestamp': '2025-10-01 04:38:41.708075', 'step': 20744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:41.741142', 'step': 20744, 'epoch': 3} {'type': 'loss', 'content': 0.053238820284605026, 'timestamp': '2025-10-01 04:38:41.742980', 'step': 20745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:41.776546', 'step': 20745, 'epoch': 3} {'type': 'loss', 'content': 0.14421674609184265, 'timestamp': '2025-10-01 04:38:41.779125', 'step': 20746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:41.813071', 'step': 20746, 'epoch': 3} {'type': 'loss', 'content': 0.03907078504562378, 'timestamp': '2025-10-01 04:38:41.815324', 'step': 20747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:41.848438', 'step': 20747, 'epoch': 3} {'type': 'loss', 'content': 0.029589306563138962, 'timestamp': '2025-10-01 04:38:41.872373', 'step': 20748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:41.905675', 'step': 20748, 'epoch': 3} {'type': 'loss', 'content': 0.02720683254301548, 'timestamp': '2025-10-01 04:38:41.907750', 'step': 20749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:41.940255', 'step': 20749, 'epoch': 3} {'type': 'loss', 'content': 0.0660829171538353, 'timestamp': '2025-10-01 04:38:41.942338', 'step': 20750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:41.974861', 'step': 20750, 'epoch': 3} {'type': 'loss', 'content': 0.09017294645309448, 'timestamp': '2025-10-01 04:38:41.976884', 'step': 20751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:42.007635', 'step': 20751, 'epoch': 3} {'type': 'loss', 'content': 0.028181657195091248, 'timestamp': '2025-10-01 04:38:42.031166', 'step': 20752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:38:42.061801', 'step': 20752, 'epoch': 3} {'type': 'loss', 'content': 0.08853573352098465, 'timestamp': '2025-10-01 04:38:42.064038', 'step': 20753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:42.097302', 'step': 20753, 'epoch': 3} {'type': 'loss', 'content': 0.07568657398223877, 'timestamp': '2025-10-01 04:38:42.100915', 'step': 20754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:42.132552', 'step': 20754, 'epoch': 3} {'type': 'loss', 'content': 0.028848832473158836, 'timestamp': '2025-10-01 04:38:42.134794', 'step': 20755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:42.168064', 'step': 20755, 'epoch': 3} {'type': 'loss', 'content': 0.07425631582736969, 'timestamp': '2025-10-01 04:38:42.192622', 'step': 20756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:42.230378', 'step': 20756, 'epoch': 3} {'type': 'loss', 'content': 0.06343281269073486, 'timestamp': '2025-10-01 04:38:42.232402', 'step': 20757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:42.263124', 'step': 20757, 'epoch': 3} {'type': 'loss', 'content': 0.016802458092570305, 'timestamp': '2025-10-01 04:38:42.265596', 'step': 20758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:42.295960', 'step': 20758, 'epoch': 3} {'type': 'loss', 'content': 0.010793711058795452, 'timestamp': '2025-10-01 04:38:42.298383', 'step': 20759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:42.333536', 'step': 20759, 'epoch': 3} {'type': 'loss', 'content': 0.057307906448841095, 'timestamp': '2025-10-01 04:38:42.357351', 'step': 20760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:42.388398', 'step': 20760, 'epoch': 3} {'type': 'loss', 'content': 0.03149634227156639, 'timestamp': '2025-10-01 04:38:42.390505', 'step': 20761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:42.424155', 'step': 20761, 'epoch': 3} {'type': 'loss', 'content': 0.07623875141143799, 'timestamp': '2025-10-01 04:38:42.426283', 'step': 20762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:42.458198', 'step': 20762, 'epoch': 3} {'type': 'loss', 'content': 0.04389053210616112, 'timestamp': '2025-10-01 04:38:42.460385', 'step': 20763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:42.501050', 'step': 20763, 'epoch': 3} {'type': 'loss', 'content': 0.056292515248060226, 'timestamp': '2025-10-01 04:38:42.524446', 'step': 20764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:42.557038', 'step': 20764, 'epoch': 3} {'type': 'loss', 'content': 0.08236042410135269, 'timestamp': '2025-10-01 04:38:42.558996', 'step': 20765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:42.589116', 'step': 20765, 'epoch': 3} {'type': 'loss', 'content': 0.05658654123544693, 'timestamp': '2025-10-01 04:38:42.591079', 'step': 20766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:42.625476', 'step': 20766, 'epoch': 3} {'type': 'loss', 'content': 0.1049303412437439, 'timestamp': '2025-10-01 04:38:42.627847', 'step': 20767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:42.658968', 'step': 20767, 'epoch': 3} {'type': 'loss', 'content': 0.041517794132232666, 'timestamp': '2025-10-01 04:38:42.682418', 'step': 20768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:42.715685', 'step': 20768, 'epoch': 3} {'type': 'loss', 'content': 0.04940659552812576, 'timestamp': '2025-10-01 04:38:42.718422', 'step': 20769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:42.750036', 'step': 20769, 'epoch': 3} {'type': 'loss', 'content': 0.08623966574668884, 'timestamp': '2025-10-01 04:38:42.752133', 'step': 20770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:42.782902', 'step': 20770, 'epoch': 3} {'type': 'loss', 'content': 0.025491181761026382, 'timestamp': '2025-10-01 04:38:42.785228', 'step': 20771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:42.819467', 'step': 20771, 'epoch': 3} {'type': 'loss', 'content': 0.032924968749284744, 'timestamp': '2025-10-01 04:38:42.843049', 'step': 20772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:42.873211', 'step': 20772, 'epoch': 3} {'type': 'loss', 'content': 0.07062797993421555, 'timestamp': '2025-10-01 04:38:42.875772', 'step': 20773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:42.906625', 'step': 20773, 'epoch': 3} {'type': 'loss', 'content': 0.09439873695373535, 'timestamp': '2025-10-01 04:38:42.908840', 'step': 20774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:42.939569', 'step': 20774, 'epoch': 3} {'type': 'loss', 'content': 0.08348232507705688, 'timestamp': '2025-10-01 04:38:42.941540', 'step': 20775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:42.972235', 'step': 20775, 'epoch': 3} {'type': 'loss', 'content': 0.02613174356520176, 'timestamp': '2025-10-01 04:38:42.995917', 'step': 20776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:43.027679', 'step': 20776, 'epoch': 3} {'type': 'loss', 'content': 0.059251636266708374, 'timestamp': '2025-10-01 04:38:43.029660', 'step': 20777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:43.060333', 'step': 20777, 'epoch': 3} {'type': 'loss', 'content': 0.0654730573296547, 'timestamp': '2025-10-01 04:38:43.062310', 'step': 20778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:43.092862', 'step': 20778, 'epoch': 3} {'type': 'loss', 'content': 0.06480420380830765, 'timestamp': '2025-10-01 04:38:43.095093', 'step': 20779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:43.126949', 'step': 20779, 'epoch': 3} {'type': 'loss', 'content': 0.09595008939504623, 'timestamp': '2025-10-01 04:38:43.155480', 'step': 20780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:43.185832', 'step': 20780, 'epoch': 3} {'type': 'loss', 'content': 0.07648027688264847, 'timestamp': '2025-10-01 04:38:43.187807', 'step': 20781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:43.219141', 'step': 20781, 'epoch': 3} {'type': 'loss', 'content': 0.07125556468963623, 'timestamp': '2025-10-01 04:38:43.221302', 'step': 20782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:43.261325', 'step': 20782, 'epoch': 3} {'type': 'loss', 'content': 0.04618770256638527, 'timestamp': '2025-10-01 04:38:43.263695', 'step': 20783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:43.295792', 'step': 20783, 'epoch': 3} {'type': 'loss', 'content': 0.027512069791555405, 'timestamp': '2025-10-01 04:38:43.319402', 'step': 20784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:43.351561', 'step': 20784, 'epoch': 3} {'type': 'loss', 'content': 0.06322726607322693, 'timestamp': '2025-10-01 04:38:43.353570', 'step': 20785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:43.383972', 'step': 20785, 'epoch': 3} {'type': 'loss', 'content': 0.04530493542551994, 'timestamp': '2025-10-01 04:38:43.386069', 'step': 20786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:43.417012', 'step': 20786, 'epoch': 3} {'type': 'loss', 'content': 0.06007687747478485, 'timestamp': '2025-10-01 04:38:43.419232', 'step': 20787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:43.449981', 'step': 20787, 'epoch': 3} {'type': 'loss', 'content': 0.10412470251321793, 'timestamp': '2025-10-01 04:38:43.473429', 'step': 20788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:43.504197', 'step': 20788, 'epoch': 3} {'type': 'loss', 'content': 0.06303904950618744, 'timestamp': '2025-10-01 04:38:43.506227', 'step': 20789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:43.537570', 'step': 20789, 'epoch': 3} {'type': 'loss', 'content': 0.12484396249055862, 'timestamp': '2025-10-01 04:38:43.539967', 'step': 20790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:43.573785', 'step': 20790, 'epoch': 3} {'type': 'loss', 'content': 0.04326234757900238, 'timestamp': '2025-10-01 04:38:43.575954', 'step': 20791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:43.606618', 'step': 20791, 'epoch': 3} {'type': 'loss', 'content': 0.07168974727392197, 'timestamp': '2025-10-01 04:38:43.630431', 'step': 20792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:43.662087', 'step': 20792, 'epoch': 3} {'type': 'loss', 'content': 0.07416252791881561, 'timestamp': '2025-10-01 04:38:43.673445', 'step': 20793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:43.704352', 'step': 20793, 'epoch': 3} {'type': 'loss', 'content': 0.022170398384332657, 'timestamp': '2025-10-01 04:38:43.706491', 'step': 20794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:43.736626', 'step': 20794, 'epoch': 3} {'type': 'loss', 'content': 0.05837317556142807, 'timestamp': '2025-10-01 04:38:43.739133', 'step': 20795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:43.770825', 'step': 20795, 'epoch': 3} {'type': 'loss', 'content': 0.03963678702712059, 'timestamp': '2025-10-01 04:38:43.794335', 'step': 20796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:43.825245', 'step': 20796, 'epoch': 3} {'type': 'loss', 'content': 0.07505971938371658, 'timestamp': '2025-10-01 04:38:43.827147', 'step': 20797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:43.857981', 'step': 20797, 'epoch': 3} {'type': 'loss', 'content': 0.037237416952848434, 'timestamp': '2025-10-01 04:38:43.860091', 'step': 20798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:43.890869', 'step': 20798, 'epoch': 3} {'type': 'loss', 'content': 0.06401384621858597, 'timestamp': '2025-10-01 04:38:43.893120', 'step': 20799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:43.923041', 'step': 20799, 'epoch': 3} {'type': 'loss', 'content': 0.08131258189678192, 'timestamp': '2025-10-01 04:38:43.946515', 'step': 20800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:43.978136', 'step': 20800, 'epoch': 3} {'type': 'loss', 'content': 0.1296273022890091, 'timestamp': '2025-10-01 04:38:43.980367', 'step': 20801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:44.010401', 'step': 20801, 'epoch': 3} {'type': 'loss', 'content': 0.049089331179857254, 'timestamp': '2025-10-01 04:38:44.012413', 'step': 20802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:44.044773', 'step': 20802, 'epoch': 3} {'type': 'loss', 'content': 0.057576872408390045, 'timestamp': '2025-10-01 04:38:44.046982', 'step': 20803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:44.078010', 'step': 20803, 'epoch': 3} {'type': 'loss', 'content': 0.03566436097025871, 'timestamp': '2025-10-01 04:38:44.102366', 'step': 20804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:44.133300', 'step': 20804, 'epoch': 3} {'type': 'loss', 'content': 0.028858190402388573, 'timestamp': '2025-10-01 04:38:44.135632', 'step': 20805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:44.166200', 'step': 20805, 'epoch': 3} {'type': 'loss', 'content': 0.15675510466098785, 'timestamp': '2025-10-01 04:38:44.168377', 'step': 20806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:44.198863', 'step': 20806, 'epoch': 3} {'type': 'loss', 'content': 0.04865383356809616, 'timestamp': '2025-10-01 04:38:44.200948', 'step': 20807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:44.240066', 'step': 20807, 'epoch': 3} {'type': 'loss', 'content': 0.05071334168314934, 'timestamp': '2025-10-01 04:38:44.263461', 'step': 20808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:44.293592', 'step': 20808, 'epoch': 3} {'type': 'loss', 'content': 0.1344376504421234, 'timestamp': '2025-10-01 04:38:44.295757', 'step': 20809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:44.326584', 'step': 20809, 'epoch': 3} {'type': 'loss', 'content': 0.015134076587855816, 'timestamp': '2025-10-01 04:38:44.328767', 'step': 20810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:44.359228', 'step': 20810, 'epoch': 3} {'type': 'loss', 'content': 0.10267458856105804, 'timestamp': '2025-10-01 04:38:44.361530', 'step': 20811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:44.392200', 'step': 20811, 'epoch': 3} {'type': 'loss', 'content': 0.043213941156864166, 'timestamp': '2025-10-01 04:38:44.415847', 'step': 20812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:44.446270', 'step': 20812, 'epoch': 3} {'type': 'loss', 'content': 0.03712053596973419, 'timestamp': '2025-10-01 04:38:44.448624', 'step': 20813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:44.479505', 'step': 20813, 'epoch': 3} {'type': 'loss', 'content': 0.056529369205236435, 'timestamp': '2025-10-01 04:38:44.481465', 'step': 20814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:44.511668', 'step': 20814, 'epoch': 3} {'type': 'loss', 'content': 0.12192363291978836, 'timestamp': '2025-10-01 04:38:44.513692', 'step': 20815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:44.551264', 'step': 20815, 'epoch': 3} {'type': 'loss', 'content': 0.07298051565885544, 'timestamp': '2025-10-01 04:38:44.574828', 'step': 20816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:44.604885', 'step': 20816, 'epoch': 3} {'type': 'loss', 'content': 0.08303572982549667, 'timestamp': '2025-10-01 04:38:44.606868', 'step': 20817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:44.638820', 'step': 20817, 'epoch': 3} {'type': 'loss', 'content': 0.060517363250255585, 'timestamp': '2025-10-01 04:38:44.642122', 'step': 20818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:44.691252', 'step': 20818, 'epoch': 3} {'type': 'loss', 'content': 0.022852998226881027, 'timestamp': '2025-10-01 04:38:44.693318', 'step': 20819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:44.735526', 'step': 20819, 'epoch': 3} {'type': 'loss', 'content': 0.07538414001464844, 'timestamp': '2025-10-01 04:38:44.759218', 'step': 20820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:44.789128', 'step': 20820, 'epoch': 3} {'type': 'loss', 'content': 0.06217630207538605, 'timestamp': '2025-10-01 04:38:44.791224', 'step': 20821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:44.821731', 'step': 20821, 'epoch': 3} {'type': 'loss', 'content': 0.018159104511141777, 'timestamp': '2025-10-01 04:38:44.823704', 'step': 20822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:44.853533', 'step': 20822, 'epoch': 3} {'type': 'loss', 'content': 0.06423620134592056, 'timestamp': '2025-10-01 04:38:44.863219', 'step': 20823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:44.893918', 'step': 20823, 'epoch': 3} {'type': 'loss', 'content': 0.1016375869512558, 'timestamp': '2025-10-01 04:38:44.917649', 'step': 20824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:44.947575', 'step': 20824, 'epoch': 3} {'type': 'loss', 'content': 0.07925426214933395, 'timestamp': '2025-10-01 04:38:44.949758', 'step': 20825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:44.979834', 'step': 20825, 'epoch': 3} {'type': 'loss', 'content': 0.10072921216487885, 'timestamp': '2025-10-01 04:38:44.981908', 'step': 20826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:45.011902', 'step': 20826, 'epoch': 3} {'type': 'loss', 'content': 0.07758048921823502, 'timestamp': '2025-10-01 04:38:45.015296', 'step': 20827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.045335', 'step': 20827, 'epoch': 3} {'type': 'loss', 'content': 0.0926458090543747, 'timestamp': '2025-10-01 04:38:45.070228', 'step': 20828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.101412', 'step': 20828, 'epoch': 3} {'type': 'loss', 'content': 0.06659100949764252, 'timestamp': '2025-10-01 04:38:45.103531', 'step': 20829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:45.134115', 'step': 20829, 'epoch': 3} {'type': 'loss', 'content': 0.07192783057689667, 'timestamp': '2025-10-01 04:38:45.137832', 'step': 20830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.167782', 'step': 20830, 'epoch': 3} {'type': 'loss', 'content': 0.07522983103990555, 'timestamp': '2025-10-01 04:38:45.169917', 'step': 20831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:45.209917', 'step': 20831, 'epoch': 3} {'type': 'loss', 'content': 0.06768758594989777, 'timestamp': '2025-10-01 04:38:45.233452', 'step': 20832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:45.263948', 'step': 20832, 'epoch': 3} {'type': 'loss', 'content': 0.06696338206529617, 'timestamp': '2025-10-01 04:38:45.266060', 'step': 20833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.296263', 'step': 20833, 'epoch': 3} {'type': 'loss', 'content': 0.1619952768087387, 'timestamp': '2025-10-01 04:38:45.298390', 'step': 20834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:45.329447', 'step': 20834, 'epoch': 3} {'type': 'loss', 'content': 0.09808909893035889, 'timestamp': '2025-10-01 04:38:45.332015', 'step': 20835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:45.361793', 'step': 20835, 'epoch': 3} {'type': 'loss', 'content': 0.08773981779813766, 'timestamp': '2025-10-01 04:38:45.385291', 'step': 20836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:45.416090', 'step': 20836, 'epoch': 3} {'type': 'loss', 'content': 0.048612870275974274, 'timestamp': '2025-10-01 04:38:45.418145', 'step': 20837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:45.448151', 'step': 20837, 'epoch': 3} {'type': 'loss', 'content': 0.04570116847753525, 'timestamp': '2025-10-01 04:38:45.450506', 'step': 20838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.480604', 'step': 20838, 'epoch': 3} {'type': 'loss', 'content': 0.08135184645652771, 'timestamp': '2025-10-01 04:38:45.482759', 'step': 20839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.512584', 'step': 20839, 'epoch': 3} {'type': 'loss', 'content': 0.07334686815738678, 'timestamp': '2025-10-01 04:38:45.536213', 'step': 20840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.566248', 'step': 20840, 'epoch': 3} {'type': 'loss', 'content': 0.04309219866991043, 'timestamp': '2025-10-01 04:38:45.569000', 'step': 20841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:45.599261', 'step': 20841, 'epoch': 3} {'type': 'loss', 'content': 0.09832067042589188, 'timestamp': '2025-10-01 04:38:45.601494', 'step': 20842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.631635', 'step': 20842, 'epoch': 3} {'type': 'loss', 'content': 0.09819251298904419, 'timestamp': '2025-10-01 04:38:45.634592', 'step': 20843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:45.664373', 'step': 20843, 'epoch': 3} {'type': 'loss', 'content': 0.06807585060596466, 'timestamp': '2025-10-01 04:38:45.695719', 'step': 20844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:45.726377', 'step': 20844, 'epoch': 3} {'type': 'loss', 'content': 0.06085706129670143, 'timestamp': '2025-10-01 04:38:45.736811', 'step': 20845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:45.766856', 'step': 20845, 'epoch': 3} {'type': 'loss', 'content': 0.07304184883832932, 'timestamp': '2025-10-01 04:38:45.769051', 'step': 20846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.801460', 'step': 20846, 'epoch': 3} {'type': 'loss', 'content': 0.038908373564481735, 'timestamp': '2025-10-01 04:38:45.803400', 'step': 20847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.848617', 'step': 20847, 'epoch': 3} {'type': 'loss', 'content': 0.04073862358927727, 'timestamp': '2025-10-01 04:38:45.872263', 'step': 20848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.902645', 'step': 20848, 'epoch': 3} {'type': 'loss', 'content': 0.07869503647089005, 'timestamp': '2025-10-01 04:38:45.904749', 'step': 20849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.935163', 'step': 20849, 'epoch': 3} {'type': 'loss', 'content': 0.03310442715883255, 'timestamp': '2025-10-01 04:38:45.938222', 'step': 20850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:45.968328', 'step': 20850, 'epoch': 3} {'type': 'loss', 'content': 0.05486877262592316, 'timestamp': '2025-10-01 04:38:45.970466', 'step': 20851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:46.010542', 'step': 20851, 'epoch': 3} {'type': 'loss', 'content': 0.08123435825109482, 'timestamp': '2025-10-01 04:38:46.042662', 'step': 20852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:46.072624', 'step': 20852, 'epoch': 3} {'type': 'loss', 'content': 0.1440640538930893, 'timestamp': '2025-10-01 04:38:46.076001', 'step': 20853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:46.107477', 'step': 20853, 'epoch': 3} {'type': 'loss', 'content': 0.08608008176088333, 'timestamp': '2025-10-01 04:38:46.110949', 'step': 20854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:46.141801', 'step': 20854, 'epoch': 3} {'type': 'loss', 'content': 0.08192386478185654, 'timestamp': '2025-10-01 04:38:46.156482', 'step': 20855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:46.186679', 'step': 20855, 'epoch': 3} {'type': 'loss', 'content': 0.08908361941576004, 'timestamp': '2025-10-01 04:38:46.210343', 'step': 20856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:46.240555', 'step': 20856, 'epoch': 3} {'type': 'loss', 'content': 0.01592128910124302, 'timestamp': '2025-10-01 04:38:46.242748', 'step': 20857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:46.272831', 'step': 20857, 'epoch': 3} {'type': 'loss', 'content': 0.06991507858037949, 'timestamp': '2025-10-01 04:38:46.274751', 'step': 20858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:46.304408', 'step': 20858, 'epoch': 3} {'type': 'loss', 'content': 0.04897862672805786, 'timestamp': '2025-10-01 04:38:46.307687', 'step': 20859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:46.338513', 'step': 20859, 'epoch': 3} {'type': 'loss', 'content': 0.07750072330236435, 'timestamp': '2025-10-01 04:38:46.362431', 'step': 20860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:46.396272', 'step': 20860, 'epoch': 3} {'type': 'loss', 'content': 0.09236772358417511, 'timestamp': '2025-10-01 04:38:46.398359', 'step': 20861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:46.428240', 'step': 20861, 'epoch': 3} {'type': 'loss', 'content': 0.09388226270675659, 'timestamp': '2025-10-01 04:38:46.430336', 'step': 20862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:46.470168', 'step': 20862, 'epoch': 3} {'type': 'loss', 'content': 0.021561352536082268, 'timestamp': '2025-10-01 04:38:46.475890', 'step': 20863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:46.512695', 'step': 20863, 'epoch': 3} {'type': 'loss', 'content': 0.048842981457710266, 'timestamp': '2025-10-01 04:38:46.538796', 'step': 20864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:46.570246', 'step': 20864, 'epoch': 3} {'type': 'loss', 'content': 0.12402686476707458, 'timestamp': '2025-10-01 04:38:46.572291', 'step': 20865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:46.602807', 'step': 20865, 'epoch': 3} {'type': 'loss', 'content': 0.04496823623776436, 'timestamp': '2025-10-01 04:38:46.605493', 'step': 20866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:46.639401', 'step': 20866, 'epoch': 3} {'type': 'loss', 'content': 0.08558361232280731, 'timestamp': '2025-10-01 04:38:46.642036', 'step': 20867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:46.673662', 'step': 20867, 'epoch': 3} {'type': 'loss', 'content': 0.1073816791176796, 'timestamp': '2025-10-01 04:38:46.699051', 'step': 20868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:46.735630', 'step': 20868, 'epoch': 3} {'type': 'loss', 'content': 0.049298059195280075, 'timestamp': '2025-10-01 04:38:46.739055', 'step': 20869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:46.774566', 'step': 20869, 'epoch': 3} {'type': 'loss', 'content': 0.06753037124872208, 'timestamp': '2025-10-01 04:38:46.780387', 'step': 20870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:46.811277', 'step': 20870, 'epoch': 3} {'type': 'loss', 'content': 0.03540762886404991, 'timestamp': '2025-10-01 04:38:46.813994', 'step': 20871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:46.843734', 'step': 20871, 'epoch': 3} {'type': 'loss', 'content': 0.08399613201618195, 'timestamp': '2025-10-01 04:38:46.867871', 'step': 20872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:46.900387', 'step': 20872, 'epoch': 3} {'type': 'loss', 'content': 0.08963239938020706, 'timestamp': '2025-10-01 04:38:46.902578', 'step': 20873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:46.942828', 'step': 20873, 'epoch': 3} {'type': 'loss', 'content': 0.08274991065263748, 'timestamp': '2025-10-01 04:38:46.944984', 'step': 20874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:46.976505', 'step': 20874, 'epoch': 3} {'type': 'loss', 'content': 0.0772647112607956, 'timestamp': '2025-10-01 04:38:46.978479', 'step': 20875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:47.009375', 'step': 20875, 'epoch': 3} {'type': 'loss', 'content': 0.038644444197416306, 'timestamp': '2025-10-01 04:38:47.033664', 'step': 20876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:47.064497', 'step': 20876, 'epoch': 3} {'type': 'loss', 'content': 0.07107611000537872, 'timestamp': '2025-10-01 04:38:47.067882', 'step': 20877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:47.104258', 'step': 20877, 'epoch': 3} {'type': 'loss', 'content': 0.06408587098121643, 'timestamp': '2025-10-01 04:38:47.106303', 'step': 20878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:47.136778', 'step': 20878, 'epoch': 3} {'type': 'loss', 'content': 0.12038023769855499, 'timestamp': '2025-10-01 04:38:47.142367', 'step': 20879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:47.174069', 'step': 20879, 'epoch': 3} {'type': 'loss', 'content': 0.042236313223838806, 'timestamp': '2025-10-01 04:38:47.198173', 'step': 20880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:47.236906', 'step': 20880, 'epoch': 3} {'type': 'loss', 'content': 0.04544203728437424, 'timestamp': '2025-10-01 04:38:47.239032', 'step': 20881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:47.268679', 'step': 20881, 'epoch': 3} {'type': 'loss', 'content': 0.04556815326213837, 'timestamp': '2025-10-01 04:38:47.270686', 'step': 20882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:47.300636', 'step': 20882, 'epoch': 3} {'type': 'loss', 'content': 0.022599006071686745, 'timestamp': '2025-10-01 04:38:47.302763', 'step': 20883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:47.334785', 'step': 20883, 'epoch': 3} {'type': 'loss', 'content': 0.01606873981654644, 'timestamp': '2025-10-01 04:38:47.358607', 'step': 20884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:47.395040', 'step': 20884, 'epoch': 3} {'type': 'loss', 'content': 0.05474301055073738, 'timestamp': '2025-10-01 04:38:47.397323', 'step': 20885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:47.427642', 'step': 20885, 'epoch': 3} {'type': 'loss', 'content': 0.05383668094873428, 'timestamp': '2025-10-01 04:38:47.429646', 'step': 20886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:47.459603', 'step': 20886, 'epoch': 3} {'type': 'loss', 'content': 0.057156071066856384, 'timestamp': '2025-10-01 04:38:47.461570', 'step': 20887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:47.492577', 'step': 20887, 'epoch': 3} {'type': 'loss', 'content': 0.06736034154891968, 'timestamp': '2025-10-01 04:38:47.516383', 'step': 20888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:47.546949', 'step': 20888, 'epoch': 3} {'type': 'loss', 'content': 0.07889094203710556, 'timestamp': '2025-10-01 04:38:47.549249', 'step': 20889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:47.578936', 'step': 20889, 'epoch': 3} {'type': 'loss', 'content': 0.07730865478515625, 'timestamp': '2025-10-01 04:38:47.581154', 'step': 20890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:47.611659', 'step': 20890, 'epoch': 3} {'type': 'loss', 'content': 0.021633511409163475, 'timestamp': '2025-10-01 04:38:47.614516', 'step': 20891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-10-01 04:38:47.644456', 'step': 20891, 'epoch': 3} {'type': 'loss', 'content': 0.03747710958123207, 'timestamp': '2025-10-01 04:38:47.672476', 'step': 20892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:47.702819', 'step': 20892, 'epoch': 3} {'type': 'loss', 'content': 0.09674961864948273, 'timestamp': '2025-10-01 04:38:47.713674', 'step': 20893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:47.744082', 'step': 20893, 'epoch': 3} {'type': 'loss', 'content': 0.049644459038972855, 'timestamp': '2025-10-01 04:38:47.746056', 'step': 20894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:47.776576', 'step': 20894, 'epoch': 3} {'type': 'loss', 'content': 0.09360311180353165, 'timestamp': '2025-10-01 04:38:47.778595', 'step': 20895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:47.809142', 'step': 20895, 'epoch': 3} {'type': 'loss', 'content': 0.0689510703086853, 'timestamp': '2025-10-01 04:38:47.844170', 'step': 20896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:47.875256', 'step': 20896, 'epoch': 3} {'type': 'loss', 'content': 0.0780985951423645, 'timestamp': '2025-10-01 04:38:47.877383', 'step': 20897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:47.907929', 'step': 20897, 'epoch': 3} {'type': 'loss', 'content': 0.10434377193450928, 'timestamp': '2025-10-01 04:38:47.910315', 'step': 20898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:38:47.940059', 'step': 20898, 'epoch': 3} {'type': 'loss', 'content': 0.08537853509187698, 'timestamp': '2025-10-01 04:38:47.942658', 'step': 20899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:38:47.972877', 'step': 20899, 'epoch': 3} {'type': 'loss', 'content': 0.07454925030469894, 'timestamp': '2025-10-01 04:38:47.998611', 'step': 20900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:48.044192', 'step': 20900, 'epoch': 3} {'type': 'loss', 'content': 0.06767380982637405, 'timestamp': '2025-10-01 04:38:48.046234', 'step': 20901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.080020', 'step': 20901, 'epoch': 3} {'type': 'loss', 'content': 0.06692658364772797, 'timestamp': '2025-10-01 04:38:48.082021', 'step': 20902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:48.111892', 'step': 20902, 'epoch': 3} {'type': 'loss', 'content': 0.07329492270946503, 'timestamp': '2025-10-01 04:38:48.114230', 'step': 20903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:48.143900', 'step': 20903, 'epoch': 3} {'type': 'loss', 'content': 0.10042746365070343, 'timestamp': '2025-10-01 04:38:48.168914', 'step': 20904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.199385', 'step': 20904, 'epoch': 3} {'type': 'loss', 'content': 0.06922043114900589, 'timestamp': '2025-10-01 04:38:48.201843', 'step': 20905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:48.231630', 'step': 20905, 'epoch': 3} {'type': 'loss', 'content': 0.09944292157888412, 'timestamp': '2025-10-01 04:38:48.233898', 'step': 20906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.264263', 'step': 20906, 'epoch': 3} {'type': 'loss', 'content': 0.01473893690854311, 'timestamp': '2025-10-01 04:38:48.266557', 'step': 20907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:48.297988', 'step': 20907, 'epoch': 3} {'type': 'loss', 'content': 0.05968150869011879, 'timestamp': '2025-10-01 04:38:48.328176', 'step': 20908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:48.359935', 'step': 20908, 'epoch': 3} {'type': 'loss', 'content': 0.025252755731344223, 'timestamp': '2025-10-01 04:38:48.362686', 'step': 20909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.396223', 'step': 20909, 'epoch': 3} {'type': 'loss', 'content': 0.11342250555753708, 'timestamp': '2025-10-01 04:38:48.398509', 'step': 20910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.429061', 'step': 20910, 'epoch': 3} {'type': 'loss', 'content': 0.05326145142316818, 'timestamp': '2025-10-01 04:38:48.431095', 'step': 20911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.461130', 'step': 20911, 'epoch': 3} {'type': 'loss', 'content': 0.03521137312054634, 'timestamp': '2025-10-01 04:38:48.484916', 'step': 20912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:48.516635', 'step': 20912, 'epoch': 3} {'type': 'loss', 'content': 0.04774566367268562, 'timestamp': '2025-10-01 04:38:48.519243', 'step': 20913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.549299', 'step': 20913, 'epoch': 3} {'type': 'loss', 'content': 0.1193232536315918, 'timestamp': '2025-10-01 04:38:48.553461', 'step': 20914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:48.584093', 'step': 20914, 'epoch': 3} {'type': 'loss', 'content': 0.11013687402009964, 'timestamp': '2025-10-01 04:38:48.586744', 'step': 20915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:48.617070', 'step': 20915, 'epoch': 3} {'type': 'loss', 'content': 0.12746885418891907, 'timestamp': '2025-10-01 04:38:48.641625', 'step': 20916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.671676', 'step': 20916, 'epoch': 3} {'type': 'loss', 'content': 0.1586524248123169, 'timestamp': '2025-10-01 04:38:48.673949', 'step': 20917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:48.704433', 'step': 20917, 'epoch': 3} {'type': 'loss', 'content': 0.07349018007516861, 'timestamp': '2025-10-01 04:38:48.717241', 'step': 20918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:48.748644', 'step': 20918, 'epoch': 3} {'type': 'loss', 'content': 0.1200091764330864, 'timestamp': '2025-10-01 04:38:48.751016', 'step': 20919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:48.780876', 'step': 20919, 'epoch': 3} {'type': 'loss', 'content': 0.14511655271053314, 'timestamp': '2025-10-01 04:38:48.804566', 'step': 20920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.837345', 'step': 20920, 'epoch': 3} {'type': 'loss', 'content': 0.05603114515542984, 'timestamp': '2025-10-01 04:38:48.845056', 'step': 20921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:48.875292', 'step': 20921, 'epoch': 3} {'type': 'loss', 'content': 0.07359626889228821, 'timestamp': '2025-10-01 04:38:48.877724', 'step': 20922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.908077', 'step': 20922, 'epoch': 3} {'type': 'loss', 'content': 0.06584116816520691, 'timestamp': '2025-10-01 04:38:48.910255', 'step': 20923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:48.951520', 'step': 20923, 'epoch': 3} {'type': 'loss', 'content': 0.050523851066827774, 'timestamp': '2025-10-01 04:38:48.983282', 'step': 20924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:49.018546', 'step': 20924, 'epoch': 3} {'type': 'loss', 'content': 0.10294496268033981, 'timestamp': '2025-10-01 04:38:49.020684', 'step': 20925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:49.050531', 'step': 20925, 'epoch': 3} {'type': 'loss', 'content': 0.049488842487335205, 'timestamp': '2025-10-01 04:38:49.053305', 'step': 20926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.084211', 'step': 20926, 'epoch': 3} {'type': 'loss', 'content': 0.09902196377515793, 'timestamp': '2025-10-01 04:38:49.087550', 'step': 20927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:49.117875', 'step': 20927, 'epoch': 3} {'type': 'loss', 'content': 0.03807072713971138, 'timestamp': '2025-10-01 04:38:49.142016', 'step': 20928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.172831', 'step': 20928, 'epoch': 3} {'type': 'loss', 'content': 0.053814131766557693, 'timestamp': '2025-10-01 04:38:49.175478', 'step': 20929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.206618', 'step': 20929, 'epoch': 3} {'type': 'loss', 'content': 0.0834936797618866, 'timestamp': '2025-10-01 04:38:49.208664', 'step': 20930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.238832', 'step': 20930, 'epoch': 3} {'type': 'loss', 'content': 0.09479024261236191, 'timestamp': '2025-10-01 04:38:49.247301', 'step': 20931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:49.277752', 'step': 20931, 'epoch': 3} {'type': 'loss', 'content': 0.03236185759305954, 'timestamp': '2025-10-01 04:38:49.301359', 'step': 20932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:49.331625', 'step': 20932, 'epoch': 3} {'type': 'loss', 'content': 0.14300188422203064, 'timestamp': '2025-10-01 04:38:49.336131', 'step': 20933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:49.366359', 'step': 20933, 'epoch': 3} {'type': 'loss', 'content': 0.05034717172384262, 'timestamp': '2025-10-01 04:38:49.368808', 'step': 20934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:38:49.399976', 'step': 20934, 'epoch': 3} {'type': 'loss', 'content': 0.03224143013358116, 'timestamp': '2025-10-01 04:38:49.404235', 'step': 20935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.435004', 'step': 20935, 'epoch': 3} {'type': 'loss', 'content': 0.09427136182785034, 'timestamp': '2025-10-01 04:38:49.459347', 'step': 20936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.490766', 'step': 20936, 'epoch': 3} {'type': 'loss', 'content': 0.05024520307779312, 'timestamp': '2025-10-01 04:38:49.493051', 'step': 20937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.524186', 'step': 20937, 'epoch': 3} {'type': 'loss', 'content': 0.038370296359062195, 'timestamp': '2025-10-01 04:38:49.528607', 'step': 20938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:49.565800', 'step': 20938, 'epoch': 3} {'type': 'loss', 'content': 0.024037091061472893, 'timestamp': '2025-10-01 04:38:49.568036', 'step': 20939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.600358', 'step': 20939, 'epoch': 3} {'type': 'loss', 'content': 0.113863006234169, 'timestamp': '2025-10-01 04:38:49.623978', 'step': 20940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:49.654480', 'step': 20940, 'epoch': 3} {'type': 'loss', 'content': 0.018137454986572266, 'timestamp': '2025-10-01 04:38:49.656751', 'step': 20941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:49.687008', 'step': 20941, 'epoch': 3} {'type': 'loss', 'content': 0.05329897254705429, 'timestamp': '2025-10-01 04:38:49.689648', 'step': 20942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:49.721013', 'step': 20942, 'epoch': 3} {'type': 'loss', 'content': 0.05242171511054039, 'timestamp': '2025-10-01 04:38:49.723304', 'step': 20943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:49.754117', 'step': 20943, 'epoch': 3} {'type': 'loss', 'content': 0.12414933741092682, 'timestamp': '2025-10-01 04:38:49.786931', 'step': 20944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:49.819391', 'step': 20944, 'epoch': 3} {'type': 'loss', 'content': 0.03465624898672104, 'timestamp': '2025-10-01 04:38:49.821577', 'step': 20945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:49.863124', 'step': 20945, 'epoch': 3} {'type': 'loss', 'content': 0.060371749103069305, 'timestamp': '2025-10-01 04:38:49.865311', 'step': 20946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.897524', 'step': 20946, 'epoch': 3} {'type': 'loss', 'content': 0.027177080512046814, 'timestamp': '2025-10-01 04:38:49.899766', 'step': 20947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.930178', 'step': 20947, 'epoch': 3} {'type': 'loss', 'content': 0.07610002905130386, 'timestamp': '2025-10-01 04:38:49.954228', 'step': 20948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:49.985500', 'step': 20948, 'epoch': 3} {'type': 'loss', 'content': 0.0853809267282486, 'timestamp': '2025-10-01 04:38:49.987979', 'step': 20949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:50.019843', 'step': 20949, 'epoch': 3} {'type': 'loss', 'content': 0.09710663557052612, 'timestamp': '2025-10-01 04:38:50.021711', 'step': 20950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.052535', 'step': 20950, 'epoch': 3} {'type': 'loss', 'content': 0.06082586571574211, 'timestamp': '2025-10-01 04:38:50.057907', 'step': 20951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:50.089265', 'step': 20951, 'epoch': 3} {'type': 'loss', 'content': 0.07973383367061615, 'timestamp': '2025-10-01 04:38:50.112968', 'step': 20952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.155675', 'step': 20952, 'epoch': 3} {'type': 'loss', 'content': 0.08099964261054993, 'timestamp': '2025-10-01 04:38:50.157675', 'step': 20953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.188963', 'step': 20953, 'epoch': 3} {'type': 'loss', 'content': 0.08496596664190292, 'timestamp': '2025-10-01 04:38:50.191049', 'step': 20954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:50.222434', 'step': 20954, 'epoch': 3} {'type': 'loss', 'content': 0.11584527045488358, 'timestamp': '2025-10-01 04:38:50.224963', 'step': 20955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.256849', 'step': 20955, 'epoch': 3} {'type': 'loss', 'content': 0.1282881647348404, 'timestamp': '2025-10-01 04:38:50.280373', 'step': 20956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:50.310772', 'step': 20956, 'epoch': 3} {'type': 'loss', 'content': 0.02073364332318306, 'timestamp': '2025-10-01 04:38:50.312940', 'step': 20957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:50.344024', 'step': 20957, 'epoch': 3} {'type': 'loss', 'content': 0.06158396601676941, 'timestamp': '2025-10-01 04:38:50.346212', 'step': 20958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.377585', 'step': 20958, 'epoch': 3} {'type': 'loss', 'content': 0.08606422692537308, 'timestamp': '2025-10-01 04:38:50.379878', 'step': 20959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.410434', 'step': 20959, 'epoch': 3} {'type': 'loss', 'content': 0.06170908734202385, 'timestamp': '2025-10-01 04:38:50.436409', 'step': 20960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:50.468030', 'step': 20960, 'epoch': 3} {'type': 'loss', 'content': 0.10852085053920746, 'timestamp': '2025-10-01 04:38:50.470128', 'step': 20961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.501349', 'step': 20961, 'epoch': 3} {'type': 'loss', 'content': 0.00797712616622448, 'timestamp': '2025-10-01 04:38:50.503500', 'step': 20962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:50.533970', 'step': 20962, 'epoch': 3} {'type': 'loss', 'content': 0.05526581034064293, 'timestamp': '2025-10-01 04:38:50.536014', 'step': 20963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.566048', 'step': 20963, 'epoch': 3} {'type': 'loss', 'content': 0.0257329773157835, 'timestamp': '2025-10-01 04:38:50.589525', 'step': 20964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:50.619671', 'step': 20964, 'epoch': 3} {'type': 'loss', 'content': 0.08908706158399582, 'timestamp': '2025-10-01 04:38:50.621716', 'step': 20965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:50.656390', 'step': 20965, 'epoch': 3} {'type': 'loss', 'content': 0.07277899235486984, 'timestamp': '2025-10-01 04:38:50.658895', 'step': 20966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.690650', 'step': 20966, 'epoch': 3} {'type': 'loss', 'content': 0.049825120717287064, 'timestamp': '2025-10-01 04:38:50.693338', 'step': 20967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.723712', 'step': 20967, 'epoch': 3} {'type': 'loss', 'content': 0.030565598979592323, 'timestamp': '2025-10-01 04:38:50.756159', 'step': 20968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:50.786683', 'step': 20968, 'epoch': 3} {'type': 'loss', 'content': 0.045814573764801025, 'timestamp': '2025-10-01 04:38:50.789386', 'step': 20969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:50.820677', 'step': 20969, 'epoch': 3} {'type': 'loss', 'content': 0.02760639414191246, 'timestamp': '2025-10-01 04:38:50.823050', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:38:59.252647', 'step': 20970, 'epoch': 3} {'type': 'pplx', 'content': 9468.008634321075, 'timestamp': '2025-10-01 04:38:59.255308', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:59.285219', 'step': 20970, 'epoch': 3} {'type': 'loss', 'content': 0.10978171974420547, 'timestamp': '2025-10-01 04:38:59.287472', 'step': 20971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:59.319587', 'step': 20971, 'epoch': 3} {'type': 'loss', 'content': 0.053028371185064316, 'timestamp': '2025-10-01 04:38:59.343494', 'step': 20972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:38:59.374676', 'step': 20972, 'epoch': 3} {'type': 'loss', 'content': 0.031032657250761986, 'timestamp': '2025-10-01 04:38:59.377281', 'step': 20973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:59.408137', 'step': 20973, 'epoch': 3} {'type': 'loss', 'content': 0.07768727838993073, 'timestamp': '2025-10-01 04:38:59.410200', 'step': 20974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:59.443021', 'step': 20974, 'epoch': 3} {'type': 'loss', 'content': 0.0908217802643776, 'timestamp': '2025-10-01 04:38:59.445202', 'step': 20975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:59.475509', 'step': 20975, 'epoch': 3} {'type': 'loss', 'content': 0.048123545944690704, 'timestamp': '2025-10-01 04:38:59.503839', 'step': 20976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:59.534170', 'step': 20976, 'epoch': 3} {'type': 'loss', 'content': 0.04149094969034195, 'timestamp': '2025-10-01 04:38:59.536208', 'step': 20977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:38:59.569270', 'step': 20977, 'epoch': 3} {'type': 'loss', 'content': 0.08974611014127731, 'timestamp': '2025-10-01 04:38:59.571305', 'step': 20978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:59.610420', 'step': 20978, 'epoch': 3} {'type': 'loss', 'content': 0.1554568111896515, 'timestamp': '2025-10-01 04:38:59.612496', 'step': 20979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:59.644566', 'step': 20979, 'epoch': 3} {'type': 'loss', 'content': 0.07269831001758575, 'timestamp': '2025-10-01 04:38:59.668260', 'step': 20980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:59.704855', 'step': 20980, 'epoch': 3} {'type': 'loss', 'content': 0.09915722161531448, 'timestamp': '2025-10-01 04:38:59.707012', 'step': 20981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:38:59.738789', 'step': 20981, 'epoch': 3} {'type': 'loss', 'content': 0.09335177391767502, 'timestamp': '2025-10-01 04:38:59.741532', 'step': 20982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:59.772583', 'step': 20982, 'epoch': 3} {'type': 'loss', 'content': 0.014219499193131924, 'timestamp': '2025-10-01 04:38:59.774629', 'step': 20983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:59.806049', 'step': 20983, 'epoch': 3} {'type': 'loss', 'content': 0.06122632324695587, 'timestamp': '2025-10-01 04:38:59.829344', 'step': 20984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:59.860675', 'step': 20984, 'epoch': 3} {'type': 'loss', 'content': 0.08548759669065475, 'timestamp': '2025-10-01 04:38:59.862697', 'step': 20985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:59.895221', 'step': 20985, 'epoch': 3} {'type': 'loss', 'content': 0.03814087063074112, 'timestamp': '2025-10-01 04:38:59.901200', 'step': 20986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:38:59.951500', 'step': 20986, 'epoch': 3} {'type': 'loss', 'content': 0.07387326657772064, 'timestamp': '2025-10-01 04:38:59.953575', 'step': 20987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:38:59.990928', 'step': 20987, 'epoch': 3} {'type': 'loss', 'content': 0.07317931205034256, 'timestamp': '2025-10-01 04:39:00.014422', 'step': 20988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:00.053182', 'step': 20988, 'epoch': 3} {'type': 'loss', 'content': 0.04223407804965973, 'timestamp': '2025-10-01 04:39:00.055227', 'step': 20989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:00.085566', 'step': 20989, 'epoch': 3} {'type': 'loss', 'content': 0.02818903513252735, 'timestamp': '2025-10-01 04:39:00.087566', 'step': 20990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:00.118403', 'step': 20990, 'epoch': 3} {'type': 'loss', 'content': 0.11970100551843643, 'timestamp': '2025-10-01 04:39:00.120498', 'step': 20991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:00.151775', 'step': 20991, 'epoch': 3} {'type': 'loss', 'content': 0.04313303530216217, 'timestamp': '2025-10-01 04:39:00.175319', 'step': 20992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:00.215246', 'step': 20992, 'epoch': 3} {'type': 'loss', 'content': 0.06774447858333588, 'timestamp': '2025-10-01 04:39:00.217239', 'step': 20993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:00.258465', 'step': 20993, 'epoch': 3} {'type': 'loss', 'content': 0.07563702762126923, 'timestamp': '2025-10-01 04:39:00.260625', 'step': 20994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:00.293257', 'step': 20994, 'epoch': 3} {'type': 'loss', 'content': 0.06853844970464706, 'timestamp': '2025-10-01 04:39:00.295823', 'step': 20995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:00.329044', 'step': 20995, 'epoch': 3} {'type': 'loss', 'content': 0.08269347250461578, 'timestamp': '2025-10-01 04:39:00.353069', 'step': 20996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:00.383843', 'step': 20996, 'epoch': 3} {'type': 'loss', 'content': 0.02057499811053276, 'timestamp': '2025-10-01 04:39:00.385940', 'step': 20997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:00.416036', 'step': 20997, 'epoch': 3} {'type': 'loss', 'content': 0.0503617525100708, 'timestamp': '2025-10-01 04:39:00.421393', 'step': 20998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:00.452002', 'step': 20998, 'epoch': 3} {'type': 'loss', 'content': 0.052171576768159866, 'timestamp': '2025-10-01 04:39:00.457939', 'step': 20999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:00.493679', 'step': 20999, 'epoch': 3} {'type': 'loss', 'content': 0.06937861442565918, 'timestamp': '2025-10-01 04:39:00.526767', 'step': 21000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21000', 'timestamp': '2025-10-01 04:39:05.301609', 'step': 21000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:05.349447', 'step': 21000, 'epoch': 3} {'type': 'loss', 'content': 0.09670506417751312, 'timestamp': '2025-10-01 04:39:05.351502', 'step': 21001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:05.383708', 'step': 21001, 'epoch': 3} {'type': 'loss', 'content': 0.040733374655246735, 'timestamp': '2025-10-01 04:39:05.386792', 'step': 21002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:05.421578', 'step': 21002, 'epoch': 3} {'type': 'loss', 'content': 0.10298775136470795, 'timestamp': '2025-10-01 04:39:05.424157', 'step': 21003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:05.466870', 'step': 21003, 'epoch': 3} {'type': 'loss', 'content': 0.05213841423392296, 'timestamp': '2025-10-01 04:39:05.490545', 'step': 21004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:05.521085', 'step': 21004, 'epoch': 3} {'type': 'loss', 'content': 0.0662025436758995, 'timestamp': '2025-10-01 04:39:05.523340', 'step': 21005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:05.554804', 'step': 21005, 'epoch': 3} {'type': 'loss', 'content': 0.09347748756408691, 'timestamp': '2025-10-01 04:39:05.558181', 'step': 21006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:05.588798', 'step': 21006, 'epoch': 3} {'type': 'loss', 'content': 0.06526096910238266, 'timestamp': '2025-10-01 04:39:05.591378', 'step': 21007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:05.622033', 'step': 21007, 'epoch': 3} {'type': 'loss', 'content': 0.07597387582063675, 'timestamp': '2025-10-01 04:39:05.647131', 'step': 21008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:05.693338', 'step': 21008, 'epoch': 3} {'type': 'loss', 'content': 0.031211599707603455, 'timestamp': '2025-10-01 04:39:05.703863', 'step': 21009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:05.742096', 'step': 21009, 'epoch': 3} {'type': 'loss', 'content': 0.05157637223601341, 'timestamp': '2025-10-01 04:39:05.743908', 'step': 21010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:05.774144', 'step': 21010, 'epoch': 3} {'type': 'loss', 'content': 0.06774302572011948, 'timestamp': '2025-10-01 04:39:05.776254', 'step': 21011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:05.807264', 'step': 21011, 'epoch': 3} {'type': 'loss', 'content': 0.08611185848712921, 'timestamp': '2025-10-01 04:39:05.830714', 'step': 21012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:05.872008', 'step': 21012, 'epoch': 3} {'type': 'loss', 'content': 0.07082794606685638, 'timestamp': '2025-10-01 04:39:05.875010', 'step': 21013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:05.921546', 'step': 21013, 'epoch': 3} {'type': 'loss', 'content': 0.0617128349840641, 'timestamp': '2025-10-01 04:39:05.924036', 'step': 21014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:05.954144', 'step': 21014, 'epoch': 3} {'type': 'loss', 'content': 0.11630979925394058, 'timestamp': '2025-10-01 04:39:05.956274', 'step': 21015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:05.997856', 'step': 21015, 'epoch': 3} {'type': 'loss', 'content': 0.09194715321063995, 'timestamp': '2025-10-01 04:39:06.021564', 'step': 21016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:06.052504', 'step': 21016, 'epoch': 3} {'type': 'loss', 'content': 0.07675177603960037, 'timestamp': '2025-10-01 04:39:06.054493', 'step': 21017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:06.084265', 'step': 21017, 'epoch': 3} {'type': 'loss', 'content': 0.07873672246932983, 'timestamp': '2025-10-01 04:39:06.086314', 'step': 21018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:06.116689', 'step': 21018, 'epoch': 3} {'type': 'loss', 'content': 0.08296910673379898, 'timestamp': '2025-10-01 04:39:06.118573', 'step': 21019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:06.149367', 'step': 21019, 'epoch': 3} {'type': 'loss', 'content': 0.04182823747396469, 'timestamp': '2025-10-01 04:39:06.172660', 'step': 21020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:06.212763', 'step': 21020, 'epoch': 3} {'type': 'loss', 'content': 0.06343589723110199, 'timestamp': '2025-10-01 04:39:06.214801', 'step': 21021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:06.244965', 'step': 21021, 'epoch': 3} {'type': 'loss', 'content': 0.018877826631069183, 'timestamp': '2025-10-01 04:39:06.248915', 'step': 21022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:06.279675', 'step': 21022, 'epoch': 3} {'type': 'loss', 'content': 0.060787003487348557, 'timestamp': '2025-10-01 04:39:06.281751', 'step': 21023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:06.312862', 'step': 21023, 'epoch': 3} {'type': 'loss', 'content': 0.028953442350029945, 'timestamp': '2025-10-01 04:39:06.336695', 'step': 21024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:06.367794', 'step': 21024, 'epoch': 3} {'type': 'loss', 'content': 0.052967239171266556, 'timestamp': '2025-10-01 04:39:06.370147', 'step': 21025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:06.400386', 'step': 21025, 'epoch': 3} {'type': 'loss', 'content': 0.10877209156751633, 'timestamp': '2025-10-01 04:39:06.403689', 'step': 21026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:06.439118', 'step': 21026, 'epoch': 3} {'type': 'loss', 'content': 0.13295556604862213, 'timestamp': '2025-10-01 04:39:06.441201', 'step': 21027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:06.473537', 'step': 21027, 'epoch': 3} {'type': 'loss', 'content': 0.05915752425789833, 'timestamp': '2025-10-01 04:39:06.498891', 'step': 21028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:06.538856', 'step': 21028, 'epoch': 3} {'type': 'loss', 'content': 0.06976456195116043, 'timestamp': '2025-10-01 04:39:06.540768', 'step': 21029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:06.571625', 'step': 21029, 'epoch': 3} {'type': 'loss', 'content': 0.06079944595694542, 'timestamp': '2025-10-01 04:39:06.573805', 'step': 21030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:06.607129', 'step': 21030, 'epoch': 3} {'type': 'loss', 'content': 0.07456959038972855, 'timestamp': '2025-10-01 04:39:06.609072', 'step': 21031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:06.638928', 'step': 21031, 'epoch': 3} {'type': 'loss', 'content': 0.10125185549259186, 'timestamp': '2025-10-01 04:39:06.662298', 'step': 21032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:06.704301', 'step': 21032, 'epoch': 3} {'type': 'loss', 'content': 0.06647316366434097, 'timestamp': '2025-10-01 04:39:06.706477', 'step': 21033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:06.736997', 'step': 21033, 'epoch': 3} {'type': 'loss', 'content': 0.09335219115018845, 'timestamp': '2025-10-01 04:39:06.739084', 'step': 21034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:06.771272', 'step': 21034, 'epoch': 3} {'type': 'loss', 'content': 0.07271783798933029, 'timestamp': '2025-10-01 04:39:06.773634', 'step': 21035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:06.806184', 'step': 21035, 'epoch': 3} {'type': 'loss', 'content': 0.08238863199949265, 'timestamp': '2025-10-01 04:39:06.830157', 'step': 21036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:06.862856', 'step': 21036, 'epoch': 3} {'type': 'loss', 'content': 0.05839432403445244, 'timestamp': '2025-10-01 04:39:06.865086', 'step': 21037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:06.896846', 'step': 21037, 'epoch': 3} {'type': 'loss', 'content': 0.044055789709091187, 'timestamp': '2025-10-01 04:39:06.899074', 'step': 21038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:06.931243', 'step': 21038, 'epoch': 3} {'type': 'loss', 'content': 0.0401824489235878, 'timestamp': '2025-10-01 04:39:06.933758', 'step': 21039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:06.966303', 'step': 21039, 'epoch': 3} {'type': 'loss', 'content': 0.0898362398147583, 'timestamp': '2025-10-01 04:39:06.989865', 'step': 21040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:07.021678', 'step': 21040, 'epoch': 3} {'type': 'loss', 'content': 0.053331587463617325, 'timestamp': '2025-10-01 04:39:07.023594', 'step': 21041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:07.053394', 'step': 21041, 'epoch': 3} {'type': 'loss', 'content': 0.09973360598087311, 'timestamp': '2025-10-01 04:39:07.055134', 'step': 21042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.087626', 'step': 21042, 'epoch': 3} {'type': 'loss', 'content': 0.0643785297870636, 'timestamp': '2025-10-01 04:39:07.089781', 'step': 21043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.131746', 'step': 21043, 'epoch': 3} {'type': 'loss', 'content': 0.05933245271444321, 'timestamp': '2025-10-01 04:39:07.155861', 'step': 21044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:07.185800', 'step': 21044, 'epoch': 3} {'type': 'loss', 'content': 0.08751214295625687, 'timestamp': '2025-10-01 04:39:07.187764', 'step': 21045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.220132', 'step': 21045, 'epoch': 3} {'type': 'loss', 'content': 0.10889625549316406, 'timestamp': '2025-10-01 04:39:07.222163', 'step': 21046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:07.254114', 'step': 21046, 'epoch': 3} {'type': 'loss', 'content': 0.03469351306557655, 'timestamp': '2025-10-01 04:39:07.257253', 'step': 21047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:07.289599', 'step': 21047, 'epoch': 3} {'type': 'loss', 'content': 0.08766410499811172, 'timestamp': '2025-10-01 04:39:07.313298', 'step': 21048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.346674', 'step': 21048, 'epoch': 3} {'type': 'loss', 'content': 0.05873143672943115, 'timestamp': '2025-10-01 04:39:07.348534', 'step': 21049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.380813', 'step': 21049, 'epoch': 3} {'type': 'loss', 'content': 0.048059213906526566, 'timestamp': '2025-10-01 04:39:07.382908', 'step': 21050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.413680', 'step': 21050, 'epoch': 3} {'type': 'loss', 'content': 0.06950289756059647, 'timestamp': '2025-10-01 04:39:07.415485', 'step': 21051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:07.456292', 'step': 21051, 'epoch': 3} {'type': 'loss', 'content': 0.08358482271432877, 'timestamp': '2025-10-01 04:39:07.479587', 'step': 21052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:07.511214', 'step': 21052, 'epoch': 3} {'type': 'loss', 'content': 0.08436230570077896, 'timestamp': '2025-10-01 04:39:07.516009', 'step': 21053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:07.547619', 'step': 21053, 'epoch': 3} {'type': 'loss', 'content': 0.06485965847969055, 'timestamp': '2025-10-01 04:39:07.550042', 'step': 21054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:07.581458', 'step': 21054, 'epoch': 3} {'type': 'loss', 'content': 0.13341563940048218, 'timestamp': '2025-10-01 04:39:07.583557', 'step': 21055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:07.617088', 'step': 21055, 'epoch': 3} {'type': 'loss', 'content': 0.041630975902080536, 'timestamp': '2025-10-01 04:39:07.641099', 'step': 21056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:07.672678', 'step': 21056, 'epoch': 3} {'type': 'loss', 'content': 0.08103561401367188, 'timestamp': '2025-10-01 04:39:07.674804', 'step': 21057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:07.707667', 'step': 21057, 'epoch': 3} {'type': 'loss', 'content': 0.1773689091205597, 'timestamp': '2025-10-01 04:39:07.709927', 'step': 21058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.742480', 'step': 21058, 'epoch': 3} {'type': 'loss', 'content': 0.10060558468103409, 'timestamp': '2025-10-01 04:39:07.745099', 'step': 21059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:07.777458', 'step': 21059, 'epoch': 3} {'type': 'loss', 'content': 0.02436560019850731, 'timestamp': '2025-10-01 04:39:07.801469', 'step': 21060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.834546', 'step': 21060, 'epoch': 3} {'type': 'loss', 'content': 0.023530371487140656, 'timestamp': '2025-10-01 04:39:07.836775', 'step': 21061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.867176', 'step': 21061, 'epoch': 3} {'type': 'loss', 'content': 0.08600407838821411, 'timestamp': '2025-10-01 04:39:07.870447', 'step': 21062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:07.912332', 'step': 21062, 'epoch': 3} {'type': 'loss', 'content': 0.047129400074481964, 'timestamp': '2025-10-01 04:39:07.914309', 'step': 21063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.945396', 'step': 21063, 'epoch': 3} {'type': 'loss', 'content': 0.0730457752943039, 'timestamp': '2025-10-01 04:39:07.969077', 'step': 21064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:07.999200', 'step': 21064, 'epoch': 3} {'type': 'loss', 'content': 0.14199858903884888, 'timestamp': '2025-10-01 04:39:08.001629', 'step': 21065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:08.032572', 'step': 21065, 'epoch': 3} {'type': 'loss', 'content': 0.038055263459682465, 'timestamp': '2025-10-01 04:39:08.035251', 'step': 21066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:08.066008', 'step': 21066, 'epoch': 3} {'type': 'loss', 'content': 0.09009812027215958, 'timestamp': '2025-10-01 04:39:08.068670', 'step': 21067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:08.100273', 'step': 21067, 'epoch': 3} {'type': 'loss', 'content': 0.07461755722761154, 'timestamp': '2025-10-01 04:39:08.123722', 'step': 21068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:08.154860', 'step': 21068, 'epoch': 3} {'type': 'loss', 'content': 0.08255297690629959, 'timestamp': '2025-10-01 04:39:08.158497', 'step': 21069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:08.193230', 'step': 21069, 'epoch': 3} {'type': 'loss', 'content': 0.0791301503777504, 'timestamp': '2025-10-01 04:39:08.195613', 'step': 21070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:08.226202', 'step': 21070, 'epoch': 3} {'type': 'loss', 'content': 0.04212307184934616, 'timestamp': '2025-10-01 04:39:08.241745', 'step': 21071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:08.273165', 'step': 21071, 'epoch': 3} {'type': 'loss', 'content': 0.045668747276067734, 'timestamp': '2025-10-01 04:39:08.296998', 'step': 21072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:08.328012', 'step': 21072, 'epoch': 3} {'type': 'loss', 'content': 0.07931112498044968, 'timestamp': '2025-10-01 04:39:08.330923', 'step': 21073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:08.367440', 'step': 21073, 'epoch': 3} {'type': 'loss', 'content': 0.042361028492450714, 'timestamp': '2025-10-01 04:39:08.369993', 'step': 21074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:08.400720', 'step': 21074, 'epoch': 3} {'type': 'loss', 'content': 0.06418583542108536, 'timestamp': '2025-10-01 04:39:08.402979', 'step': 21075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:08.442502', 'step': 21075, 'epoch': 3} {'type': 'loss', 'content': 0.12249733507633209, 'timestamp': '2025-10-01 04:39:08.465856', 'step': 21076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:08.496629', 'step': 21076, 'epoch': 3} {'type': 'loss', 'content': 0.05627141520380974, 'timestamp': '2025-10-01 04:39:08.499135', 'step': 21077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:08.530260', 'step': 21077, 'epoch': 3} {'type': 'loss', 'content': 0.13517044484615326, 'timestamp': '2025-10-01 04:39:08.532774', 'step': 21078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:08.572710', 'step': 21078, 'epoch': 3} {'type': 'loss', 'content': 0.08882944285869598, 'timestamp': '2025-10-01 04:39:08.575187', 'step': 21079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:08.606106', 'step': 21079, 'epoch': 3} {'type': 'loss', 'content': 0.03884420916438103, 'timestamp': '2025-10-01 04:39:08.631558', 'step': 21080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:08.665353', 'step': 21080, 'epoch': 3} {'type': 'loss', 'content': 0.04812697321176529, 'timestamp': '2025-10-01 04:39:08.668164', 'step': 21081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:08.710931', 'step': 21081, 'epoch': 3} {'type': 'loss', 'content': 0.08444913476705551, 'timestamp': '2025-10-01 04:39:08.721320', 'step': 21082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:08.763681', 'step': 21082, 'epoch': 3} {'type': 'loss', 'content': 0.041542623192071915, 'timestamp': '2025-10-01 04:39:08.766877', 'step': 21083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:08.800549', 'step': 21083, 'epoch': 3} {'type': 'loss', 'content': 0.08978662639856339, 'timestamp': '2025-10-01 04:39:08.824127', 'step': 21084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:08.858851', 'step': 21084, 'epoch': 3} {'type': 'loss', 'content': 0.1641847789287567, 'timestamp': '2025-10-01 04:39:08.861517', 'step': 21085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:08.892265', 'step': 21085, 'epoch': 3} {'type': 'loss', 'content': 0.05120009928941727, 'timestamp': '2025-10-01 04:39:08.894274', 'step': 21086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:08.924574', 'step': 21086, 'epoch': 3} {'type': 'loss', 'content': 0.056919559836387634, 'timestamp': '2025-10-01 04:39:08.926894', 'step': 21087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:08.957882', 'step': 21087, 'epoch': 3} {'type': 'loss', 'content': 0.04556962475180626, 'timestamp': '2025-10-01 04:39:08.982071', 'step': 21088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.013551', 'step': 21088, 'epoch': 3} {'type': 'loss', 'content': 0.07531271129846573, 'timestamp': '2025-10-01 04:39:09.016003', 'step': 21089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.046891', 'step': 21089, 'epoch': 3} {'type': 'loss', 'content': 0.07134871929883957, 'timestamp': '2025-10-01 04:39:09.049500', 'step': 21090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:09.080172', 'step': 21090, 'epoch': 3} {'type': 'loss', 'content': 0.060386016964912415, 'timestamp': '2025-10-01 04:39:09.082859', 'step': 21091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.114433', 'step': 21091, 'epoch': 3} {'type': 'loss', 'content': 0.0769953727722168, 'timestamp': '2025-10-01 04:39:09.138783', 'step': 21092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.174856', 'step': 21092, 'epoch': 3} {'type': 'loss', 'content': 0.018881890922784805, 'timestamp': '2025-10-01 04:39:09.177638', 'step': 21093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:09.208110', 'step': 21093, 'epoch': 3} {'type': 'loss', 'content': 0.12035932391881943, 'timestamp': '2025-10-01 04:39:09.211579', 'step': 21094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:09.242843', 'step': 21094, 'epoch': 3} {'type': 'loss', 'content': 0.023594222962856293, 'timestamp': '2025-10-01 04:39:09.244988', 'step': 21095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.276468', 'step': 21095, 'epoch': 3} {'type': 'loss', 'content': 0.08748209476470947, 'timestamp': '2025-10-01 04:39:09.300156', 'step': 21096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:09.330406', 'step': 21096, 'epoch': 3} {'type': 'loss', 'content': 0.08146850019693375, 'timestamp': '2025-10-01 04:39:09.332393', 'step': 21097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.362684', 'step': 21097, 'epoch': 3} {'type': 'loss', 'content': 0.09514805674552917, 'timestamp': '2025-10-01 04:39:09.364905', 'step': 21098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:09.396087', 'step': 21098, 'epoch': 3} {'type': 'loss', 'content': 0.023334190249443054, 'timestamp': '2025-10-01 04:39:09.398803', 'step': 21099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:09.429466', 'step': 21099, 'epoch': 3} {'type': 'loss', 'content': 0.13014210760593414, 'timestamp': '2025-10-01 04:39:09.459473', 'step': 21100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:09.489129', 'step': 21100, 'epoch': 3} {'type': 'loss', 'content': 0.021265871822834015, 'timestamp': '2025-10-01 04:39:09.491010', 'step': 21101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:09.520739', 'step': 21101, 'epoch': 3} {'type': 'loss', 'content': 0.07285276800394058, 'timestamp': '2025-10-01 04:39:09.522895', 'step': 21102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.554285', 'step': 21102, 'epoch': 3} {'type': 'loss', 'content': 0.12168814241886139, 'timestamp': '2025-10-01 04:39:09.556287', 'step': 21103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.586577', 'step': 21103, 'epoch': 3} {'type': 'loss', 'content': 0.06090541556477547, 'timestamp': '2025-10-01 04:39:09.610162', 'step': 21104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:09.651005', 'step': 21104, 'epoch': 3} {'type': 'loss', 'content': 0.06898167729377747, 'timestamp': '2025-10-01 04:39:09.653474', 'step': 21105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:09.684321', 'step': 21105, 'epoch': 3} {'type': 'loss', 'content': 0.05259934067726135, 'timestamp': '2025-10-01 04:39:09.687224', 'step': 21106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.718255', 'step': 21106, 'epoch': 3} {'type': 'loss', 'content': 0.05736081302165985, 'timestamp': '2025-10-01 04:39:09.720376', 'step': 21107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.751021', 'step': 21107, 'epoch': 3} {'type': 'loss', 'content': 0.03583553805947304, 'timestamp': '2025-10-01 04:39:09.774510', 'step': 21108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:09.804848', 'step': 21108, 'epoch': 3} {'type': 'loss', 'content': 0.03518626093864441, 'timestamp': '2025-10-01 04:39:09.807817', 'step': 21109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:09.839013', 'step': 21109, 'epoch': 3} {'type': 'loss', 'content': 0.13954710960388184, 'timestamp': '2025-10-01 04:39:09.843873', 'step': 21110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:09.876015', 'step': 21110, 'epoch': 3} {'type': 'loss', 'content': 0.05493103712797165, 'timestamp': '2025-10-01 04:39:09.878157', 'step': 21111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:09.909717', 'step': 21111, 'epoch': 3} {'type': 'loss', 'content': 0.11020532250404358, 'timestamp': '2025-10-01 04:39:09.933076', 'step': 21112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:09.962943', 'step': 21112, 'epoch': 3} {'type': 'loss', 'content': 0.09824260324239731, 'timestamp': '2025-10-01 04:39:09.964857', 'step': 21113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:09.997097', 'step': 21113, 'epoch': 3} {'type': 'loss', 'content': 0.15297871828079224, 'timestamp': '2025-10-01 04:39:09.999778', 'step': 21114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:10.031365', 'step': 21114, 'epoch': 3} {'type': 'loss', 'content': 0.06441148370504379, 'timestamp': '2025-10-01 04:39:10.033731', 'step': 21115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:10.068958', 'step': 21115, 'epoch': 3} {'type': 'loss', 'content': 0.03794437274336815, 'timestamp': '2025-10-01 04:39:10.092325', 'step': 21116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:10.123906', 'step': 21116, 'epoch': 3} {'type': 'loss', 'content': 0.07235175371170044, 'timestamp': '2025-10-01 04:39:10.125919', 'step': 21117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:10.155968', 'step': 21117, 'epoch': 3} {'type': 'loss', 'content': 0.015468480065464973, 'timestamp': '2025-10-01 04:39:10.158077', 'step': 21118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:10.188447', 'step': 21118, 'epoch': 3} {'type': 'loss', 'content': 0.05242861807346344, 'timestamp': '2025-10-01 04:39:10.190476', 'step': 21119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:10.222476', 'step': 21119, 'epoch': 3} {'type': 'loss', 'content': 0.027505068108439445, 'timestamp': '2025-10-01 04:39:10.246001', 'step': 21120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:10.276250', 'step': 21120, 'epoch': 3} {'type': 'loss', 'content': 0.1131468340754509, 'timestamp': '2025-10-01 04:39:10.278798', 'step': 21121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:10.309627', 'step': 21121, 'epoch': 3} {'type': 'loss', 'content': 0.05990069359540939, 'timestamp': '2025-10-01 04:39:10.311394', 'step': 21122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:10.342018', 'step': 21122, 'epoch': 3} {'type': 'loss', 'content': 0.051009196788072586, 'timestamp': '2025-10-01 04:39:10.344806', 'step': 21123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:10.375671', 'step': 21123, 'epoch': 3} {'type': 'loss', 'content': 0.08934195339679718, 'timestamp': '2025-10-01 04:39:10.399857', 'step': 21124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:10.431173', 'step': 21124, 'epoch': 3} {'type': 'loss', 'content': 0.041509564965963364, 'timestamp': '2025-10-01 04:39:10.433337', 'step': 21125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:10.463881', 'step': 21125, 'epoch': 3} {'type': 'loss', 'content': 0.14149437844753265, 'timestamp': '2025-10-01 04:39:10.466058', 'step': 21126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:10.495804', 'step': 21126, 'epoch': 3} {'type': 'loss', 'content': 0.06715838611125946, 'timestamp': '2025-10-01 04:39:10.497982', 'step': 21127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:10.528877', 'step': 21127, 'epoch': 3} {'type': 'loss', 'content': 0.05599134787917137, 'timestamp': '2025-10-01 04:39:10.552511', 'step': 21128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:10.591809', 'step': 21128, 'epoch': 3} {'type': 'loss', 'content': 0.0749092623591423, 'timestamp': '2025-10-01 04:39:10.593924', 'step': 21129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:10.624869', 'step': 21129, 'epoch': 3} {'type': 'loss', 'content': 0.12198971211910248, 'timestamp': '2025-10-01 04:39:10.628020', 'step': 21130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:10.670791', 'step': 21130, 'epoch': 3} {'type': 'loss', 'content': 0.10946565121412277, 'timestamp': '2025-10-01 04:39:10.672826', 'step': 21131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:10.711521', 'step': 21131, 'epoch': 3} {'type': 'loss', 'content': 0.05709473043680191, 'timestamp': '2025-10-01 04:39:10.741257', 'step': 21132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:10.771698', 'step': 21132, 'epoch': 3} {'type': 'loss', 'content': 0.10410889983177185, 'timestamp': '2025-10-01 04:39:10.774401', 'step': 21133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:10.804499', 'step': 21133, 'epoch': 3} {'type': 'loss', 'content': 0.07157066464424133, 'timestamp': '2025-10-01 04:39:10.807300', 'step': 21134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:10.846925', 'step': 21134, 'epoch': 3} {'type': 'loss', 'content': 0.024983221665024757, 'timestamp': '2025-10-01 04:39:10.848952', 'step': 21135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:10.879068', 'step': 21135, 'epoch': 3} {'type': 'loss', 'content': 0.030303332954645157, 'timestamp': '2025-10-01 04:39:10.902905', 'step': 21136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:10.933058', 'step': 21136, 'epoch': 3} {'type': 'loss', 'content': 0.0878734290599823, 'timestamp': '2025-10-01 04:39:10.935081', 'step': 21137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:10.965105', 'step': 21137, 'epoch': 3} {'type': 'loss', 'content': 0.09316838532686234, 'timestamp': '2025-10-01 04:39:10.967255', 'step': 21138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:11.000519', 'step': 21138, 'epoch': 3} {'type': 'loss', 'content': 0.10081778466701508, 'timestamp': '2025-10-01 04:39:11.003478', 'step': 21139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:11.034005', 'step': 21139, 'epoch': 3} {'type': 'loss', 'content': 0.08446093648672104, 'timestamp': '2025-10-01 04:39:11.058007', 'step': 21140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:11.090574', 'step': 21140, 'epoch': 3} {'type': 'loss', 'content': 0.04876427352428436, 'timestamp': '2025-10-01 04:39:11.092647', 'step': 21141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:11.123365', 'step': 21141, 'epoch': 3} {'type': 'loss', 'content': 0.04836403205990791, 'timestamp': '2025-10-01 04:39:11.133307', 'step': 21142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:11.163869', 'step': 21142, 'epoch': 3} {'type': 'loss', 'content': 0.02403843030333519, 'timestamp': '2025-10-01 04:39:11.166119', 'step': 21143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:11.197217', 'step': 21143, 'epoch': 3} {'type': 'loss', 'content': 0.04779646173119545, 'timestamp': '2025-10-01 04:39:11.220751', 'step': 21144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:11.251791', 'step': 21144, 'epoch': 3} {'type': 'loss', 'content': 0.012302987277507782, 'timestamp': '2025-10-01 04:39:11.253878', 'step': 21145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:11.284313', 'step': 21145, 'epoch': 3} {'type': 'loss', 'content': 0.03576762229204178, 'timestamp': '2025-10-01 04:39:11.288630', 'step': 21146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:11.319021', 'step': 21146, 'epoch': 3} {'type': 'loss', 'content': 0.1016615629196167, 'timestamp': '2025-10-01 04:39:11.321089', 'step': 21147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:11.358126', 'step': 21147, 'epoch': 3} {'type': 'loss', 'content': 0.09625914692878723, 'timestamp': '2025-10-01 04:39:11.382005', 'step': 21148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:11.414972', 'step': 21148, 'epoch': 3} {'type': 'loss', 'content': 0.0433960035443306, 'timestamp': '2025-10-01 04:39:11.417398', 'step': 21149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:11.449315', 'step': 21149, 'epoch': 3} {'type': 'loss', 'content': 0.0690232664346695, 'timestamp': '2025-10-01 04:39:11.451887', 'step': 21150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:11.482630', 'step': 21150, 'epoch': 3} {'type': 'loss', 'content': 0.036555998027324677, 'timestamp': '2025-10-01 04:39:11.489568', 'step': 21151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:11.525115', 'step': 21151, 'epoch': 3} {'type': 'loss', 'content': 0.10302430391311646, 'timestamp': '2025-10-01 04:39:11.554962', 'step': 21152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:11.586693', 'step': 21152, 'epoch': 3} {'type': 'loss', 'content': 0.04993915930390358, 'timestamp': '2025-10-01 04:39:11.595068', 'step': 21153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:11.627763', 'step': 21153, 'epoch': 3} {'type': 'loss', 'content': 0.08865819126367569, 'timestamp': '2025-10-01 04:39:11.630102', 'step': 21154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:11.662592', 'step': 21154, 'epoch': 3} {'type': 'loss', 'content': 0.038903795182704926, 'timestamp': '2025-10-01 04:39:11.664998', 'step': 21155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:11.695985', 'step': 21155, 'epoch': 3} {'type': 'loss', 'content': 0.04546032100915909, 'timestamp': '2025-10-01 04:39:11.719564', 'step': 21156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:11.750365', 'step': 21156, 'epoch': 3} {'type': 'loss', 'content': 0.11892406642436981, 'timestamp': '2025-10-01 04:39:11.752484', 'step': 21157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:11.782513', 'step': 21157, 'epoch': 3} {'type': 'loss', 'content': 0.04851894453167915, 'timestamp': '2025-10-01 04:39:11.784846', 'step': 21158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:11.816181', 'step': 21158, 'epoch': 3} {'type': 'loss', 'content': 0.049603819847106934, 'timestamp': '2025-10-01 04:39:11.818179', 'step': 21159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:11.848179', 'step': 21159, 'epoch': 3} {'type': 'loss', 'content': 0.059380415827035904, 'timestamp': '2025-10-01 04:39:11.877861', 'step': 21160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:11.909121', 'step': 21160, 'epoch': 3} {'type': 'loss', 'content': 0.05115131288766861, 'timestamp': '2025-10-01 04:39:11.911426', 'step': 21161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:11.945513', 'step': 21161, 'epoch': 3} {'type': 'loss', 'content': 0.1302480846643448, 'timestamp': '2025-10-01 04:39:11.947623', 'step': 21162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:11.978549', 'step': 21162, 'epoch': 3} {'type': 'loss', 'content': 0.05947601795196533, 'timestamp': '2025-10-01 04:39:11.980714', 'step': 21163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:12.011510', 'step': 21163, 'epoch': 3} {'type': 'loss', 'content': 0.04302522540092468, 'timestamp': '2025-10-01 04:39:12.035420', 'step': 21164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:12.067040', 'step': 21164, 'epoch': 3} {'type': 'loss', 'content': 0.03304952755570412, 'timestamp': '2025-10-01 04:39:12.069490', 'step': 21165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:12.100154', 'step': 21165, 'epoch': 3} {'type': 'loss', 'content': 0.043809954077005386, 'timestamp': '2025-10-01 04:39:12.108640', 'step': 21166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:12.141566', 'step': 21166, 'epoch': 3} {'type': 'loss', 'content': 0.06147289276123047, 'timestamp': '2025-10-01 04:39:12.148756', 'step': 21167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:12.180071', 'step': 21167, 'epoch': 3} {'type': 'loss', 'content': 0.045176491141319275, 'timestamp': '2025-10-01 04:39:12.203837', 'step': 21168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:12.233970', 'step': 21168, 'epoch': 3} {'type': 'loss', 'content': 0.06757759302854538, 'timestamp': '2025-10-01 04:39:12.236324', 'step': 21169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:12.269263', 'step': 21169, 'epoch': 3} {'type': 'loss', 'content': 0.11005260795354843, 'timestamp': '2025-10-01 04:39:12.271573', 'step': 21170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:12.304914', 'step': 21170, 'epoch': 3} {'type': 'loss', 'content': 0.04898015409708023, 'timestamp': '2025-10-01 04:39:12.307094', 'step': 21171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:12.340324', 'step': 21171, 'epoch': 3} {'type': 'loss', 'content': 0.03837484493851662, 'timestamp': '2025-10-01 04:39:12.363911', 'step': 21172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:12.394111', 'step': 21172, 'epoch': 3} {'type': 'loss', 'content': 0.02786865457892418, 'timestamp': '2025-10-01 04:39:12.396156', 'step': 21173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:12.426241', 'step': 21173, 'epoch': 3} {'type': 'loss', 'content': 0.12403436005115509, 'timestamp': '2025-10-01 04:39:12.428387', 'step': 21174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:12.459801', 'step': 21174, 'epoch': 3} {'type': 'loss', 'content': 0.028835559263825417, 'timestamp': '2025-10-01 04:39:12.464400', 'step': 21175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:39:12.496581', 'step': 21175, 'epoch': 3} {'type': 'loss', 'content': 0.04388653114438057, 'timestamp': '2025-10-01 04:39:12.526954', 'step': 21176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:12.557572', 'step': 21176, 'epoch': 3} {'type': 'loss', 'content': 0.07578562200069427, 'timestamp': '2025-10-01 04:39:12.559759', 'step': 21177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:12.590877', 'step': 21177, 'epoch': 3} {'type': 'loss', 'content': 0.06592050194740295, 'timestamp': '2025-10-01 04:39:12.599402', 'step': 21178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:39:12.635794', 'step': 21178, 'epoch': 3} {'type': 'loss', 'content': 0.1100899800658226, 'timestamp': '2025-10-01 04:39:12.640226', 'step': 21179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:12.671546', 'step': 21179, 'epoch': 3} {'type': 'loss', 'content': 0.07700791209936142, 'timestamp': '2025-10-01 04:39:12.699973', 'step': 21180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:12.731212', 'step': 21180, 'epoch': 3} {'type': 'loss', 'content': 0.03713735193014145, 'timestamp': '2025-10-01 04:39:12.733658', 'step': 21181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:12.766414', 'step': 21181, 'epoch': 3} {'type': 'loss', 'content': 0.04545098915696144, 'timestamp': '2025-10-01 04:39:12.769466', 'step': 21182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:12.800520', 'step': 21182, 'epoch': 3} {'type': 'loss', 'content': 0.03773592412471771, 'timestamp': '2025-10-01 04:39:12.804034', 'step': 21183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:12.834077', 'step': 21183, 'epoch': 3} {'type': 'loss', 'content': 0.08418600261211395, 'timestamp': '2025-10-01 04:39:12.857843', 'step': 21184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:12.891678', 'step': 21184, 'epoch': 3} {'type': 'loss', 'content': 0.05815054103732109, 'timestamp': '2025-10-01 04:39:12.894432', 'step': 21185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:12.928441', 'step': 21185, 'epoch': 3} {'type': 'loss', 'content': 0.10992143303155899, 'timestamp': '2025-10-01 04:39:12.934461', 'step': 21186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:12.969355', 'step': 21186, 'epoch': 3} {'type': 'loss', 'content': 0.08017425239086151, 'timestamp': '2025-10-01 04:39:12.971443', 'step': 21187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:13.005389', 'step': 21187, 'epoch': 3} {'type': 'loss', 'content': 0.07142629474401474, 'timestamp': '2025-10-01 04:39:13.029303', 'step': 21188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:13.066042', 'step': 21188, 'epoch': 3} {'type': 'loss', 'content': 0.10344019532203674, 'timestamp': '2025-10-01 04:39:13.068369', 'step': 21189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:13.099312', 'step': 21189, 'epoch': 3} {'type': 'loss', 'content': 0.06194470822811127, 'timestamp': '2025-10-01 04:39:13.104442', 'step': 21190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:13.139385', 'step': 21190, 'epoch': 3} {'type': 'loss', 'content': 0.051008958369493484, 'timestamp': '2025-10-01 04:39:13.141931', 'step': 21191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:13.177006', 'step': 21191, 'epoch': 3} {'type': 'loss', 'content': 0.043981388211250305, 'timestamp': '2025-10-01 04:39:13.201233', 'step': 21192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:13.238357', 'step': 21192, 'epoch': 3} {'type': 'loss', 'content': 0.03813568502664566, 'timestamp': '2025-10-01 04:39:13.242034', 'step': 21193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:13.272722', 'step': 21193, 'epoch': 3} {'type': 'loss', 'content': 0.11065512895584106, 'timestamp': '2025-10-01 04:39:13.275201', 'step': 21194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:13.312618', 'step': 21194, 'epoch': 3} {'type': 'loss', 'content': 0.04094194248318672, 'timestamp': '2025-10-01 04:39:13.321397', 'step': 21195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:13.351922', 'step': 21195, 'epoch': 3} {'type': 'loss', 'content': 0.1542641520500183, 'timestamp': '2025-10-01 04:39:13.375630', 'step': 21196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:13.405899', 'step': 21196, 'epoch': 3} {'type': 'loss', 'content': 0.03851604089140892, 'timestamp': '2025-10-01 04:39:13.408737', 'step': 21197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:13.444011', 'step': 21197, 'epoch': 3} {'type': 'loss', 'content': 0.08999986201524734, 'timestamp': '2025-10-01 04:39:13.445976', 'step': 21198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:13.480146', 'step': 21198, 'epoch': 3} {'type': 'loss', 'content': 0.08116518706083298, 'timestamp': '2025-10-01 04:39:13.482246', 'step': 21199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:13.523442', 'step': 21199, 'epoch': 3} {'type': 'loss', 'content': 0.052737779915332794, 'timestamp': '2025-10-01 04:39:13.547386', 'step': 21200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:13.581039', 'step': 21200, 'epoch': 3} {'type': 'loss', 'content': 0.07167460024356842, 'timestamp': '2025-10-01 04:39:13.588882', 'step': 21201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:13.620357', 'step': 21201, 'epoch': 3} {'type': 'loss', 'content': 0.021085519343614578, 'timestamp': '2025-10-01 04:39:13.622161', 'step': 21202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:13.652213', 'step': 21202, 'epoch': 3} {'type': 'loss', 'content': 0.08558888733386993, 'timestamp': '2025-10-01 04:39:13.654453', 'step': 21203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:13.686782', 'step': 21203, 'epoch': 3} {'type': 'loss', 'content': 0.03331400826573372, 'timestamp': '2025-10-01 04:39:13.711725', 'step': 21204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:13.743964', 'step': 21204, 'epoch': 3} {'type': 'loss', 'content': 0.089544877409935, 'timestamp': '2025-10-01 04:39:13.746216', 'step': 21205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:13.776446', 'step': 21205, 'epoch': 3} {'type': 'loss', 'content': 0.043177127838134766, 'timestamp': '2025-10-01 04:39:13.781724', 'step': 21206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:13.814580', 'step': 21206, 'epoch': 3} {'type': 'loss', 'content': 0.04827122762799263, 'timestamp': '2025-10-01 04:39:13.816732', 'step': 21207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:13.847095', 'step': 21207, 'epoch': 3} {'type': 'loss', 'content': 0.04345507547259331, 'timestamp': '2025-10-01 04:39:13.870584', 'step': 21208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:13.901998', 'step': 21208, 'epoch': 3} {'type': 'loss', 'content': 0.04939712584018707, 'timestamp': '2025-10-01 04:39:13.904961', 'step': 21209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:13.937455', 'step': 21209, 'epoch': 3} {'type': 'loss', 'content': 0.04608912765979767, 'timestamp': '2025-10-01 04:39:13.948880', 'step': 21210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:13.988517', 'step': 21210, 'epoch': 3} {'type': 'loss', 'content': 0.09419332444667816, 'timestamp': '2025-10-01 04:39:13.991020', 'step': 21211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:14.021091', 'step': 21211, 'epoch': 3} {'type': 'loss', 'content': 0.14010216295719147, 'timestamp': '2025-10-01 04:39:14.044685', 'step': 21212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:14.079680', 'step': 21212, 'epoch': 3} {'type': 'loss', 'content': 0.022579744458198547, 'timestamp': '2025-10-01 04:39:14.086359', 'step': 21213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:14.119847', 'step': 21213, 'epoch': 3} {'type': 'loss', 'content': 0.0869721919298172, 'timestamp': '2025-10-01 04:39:14.126160', 'step': 21214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:14.163352', 'step': 21214, 'epoch': 3} {'type': 'loss', 'content': 0.10142628103494644, 'timestamp': '2025-10-01 04:39:14.165854', 'step': 21215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:14.198392', 'step': 21215, 'epoch': 3} {'type': 'loss', 'content': 0.051884446293115616, 'timestamp': '2025-10-01 04:39:14.222374', 'step': 21216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:14.254911', 'step': 21216, 'epoch': 3} {'type': 'loss', 'content': 0.04138168692588806, 'timestamp': '2025-10-01 04:39:14.258459', 'step': 21217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:14.291290', 'step': 21217, 'epoch': 3} {'type': 'loss', 'content': 0.07170974463224411, 'timestamp': '2025-10-01 04:39:14.293247', 'step': 21218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:14.329962', 'step': 21218, 'epoch': 3} {'type': 'loss', 'content': 0.07840071618556976, 'timestamp': '2025-10-01 04:39:14.332908', 'step': 21219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:14.370929', 'step': 21219, 'epoch': 3} {'type': 'loss', 'content': 0.07106289267539978, 'timestamp': '2025-10-01 04:39:14.396806', 'step': 21220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:14.427941', 'step': 21220, 'epoch': 3} {'type': 'loss', 'content': 0.07005100697278976, 'timestamp': '2025-10-01 04:39:14.434559', 'step': 21221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:14.466716', 'step': 21221, 'epoch': 3} {'type': 'loss', 'content': 0.09868630021810532, 'timestamp': '2025-10-01 04:39:14.470703', 'step': 21222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:14.526872', 'step': 21222, 'epoch': 3} {'type': 'loss', 'content': 0.017659209668636322, 'timestamp': '2025-10-01 04:39:14.529878', 'step': 21223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:14.570817', 'step': 21223, 'epoch': 3} {'type': 'loss', 'content': 0.02501147985458374, 'timestamp': '2025-10-01 04:39:14.598286', 'step': 21224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:14.629805', 'step': 21224, 'epoch': 3} {'type': 'loss', 'content': 0.07387764751911163, 'timestamp': '2025-10-01 04:39:14.632170', 'step': 21225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:14.664229', 'step': 21225, 'epoch': 3} {'type': 'loss', 'content': 0.05188961699604988, 'timestamp': '2025-10-01 04:39:14.666825', 'step': 21226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:14.700077', 'step': 21226, 'epoch': 3} {'type': 'loss', 'content': 0.15757237374782562, 'timestamp': '2025-10-01 04:39:14.702468', 'step': 21227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:39:14.753045', 'step': 21227, 'epoch': 3} {'type': 'loss', 'content': 0.012922121211886406, 'timestamp': '2025-10-01 04:39:14.778821', 'step': 21228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:14.811981', 'step': 21228, 'epoch': 3} {'type': 'loss', 'content': 0.05583898723125458, 'timestamp': '2025-10-01 04:39:14.815048', 'step': 21229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:14.846405', 'step': 21229, 'epoch': 3} {'type': 'loss', 'content': 0.0698482096195221, 'timestamp': '2025-10-01 04:39:14.848402', 'step': 21230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:14.885998', 'step': 21230, 'epoch': 3} {'type': 'loss', 'content': 0.08254644274711609, 'timestamp': '2025-10-01 04:39:14.888206', 'step': 21231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:14.919686', 'step': 21231, 'epoch': 3} {'type': 'loss', 'content': 0.009284128434956074, 'timestamp': '2025-10-01 04:39:14.943667', 'step': 21232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:14.975807', 'step': 21232, 'epoch': 3} {'type': 'loss', 'content': 0.0817611813545227, 'timestamp': '2025-10-01 04:39:14.978661', 'step': 21233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:15.009731', 'step': 21233, 'epoch': 3} {'type': 'loss', 'content': 0.02240118198096752, 'timestamp': '2025-10-01 04:39:15.012319', 'step': 21234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.043007', 'step': 21234, 'epoch': 3} {'type': 'loss', 'content': 0.12362035363912582, 'timestamp': '2025-10-01 04:39:15.045213', 'step': 21235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:15.091080', 'step': 21235, 'epoch': 3} {'type': 'loss', 'content': 0.10345069319009781, 'timestamp': '2025-10-01 04:39:15.118469', 'step': 21236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.153404', 'step': 21236, 'epoch': 3} {'type': 'loss', 'content': 0.09830377995967865, 'timestamp': '2025-10-01 04:39:15.157446', 'step': 21237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:15.189498', 'step': 21237, 'epoch': 3} {'type': 'loss', 'content': 0.10469364374876022, 'timestamp': '2025-10-01 04:39:15.191651', 'step': 21238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.224148', 'step': 21238, 'epoch': 3} {'type': 'loss', 'content': 0.10366837680339813, 'timestamp': '2025-10-01 04:39:15.226868', 'step': 21239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:15.259482', 'step': 21239, 'epoch': 3} {'type': 'loss', 'content': 0.03224373608827591, 'timestamp': '2025-10-01 04:39:15.283997', 'step': 21240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.317080', 'step': 21240, 'epoch': 3} {'type': 'loss', 'content': 0.06634922325611115, 'timestamp': '2025-10-01 04:39:15.319080', 'step': 21241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:15.350079', 'step': 21241, 'epoch': 3} {'type': 'loss', 'content': 0.16280654072761536, 'timestamp': '2025-10-01 04:39:15.352030', 'step': 21242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:15.383272', 'step': 21242, 'epoch': 3} {'type': 'loss', 'content': 0.0560913048684597, 'timestamp': '2025-10-01 04:39:15.385795', 'step': 21243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.424679', 'step': 21243, 'epoch': 3} {'type': 'loss', 'content': 0.051890481263399124, 'timestamp': '2025-10-01 04:39:15.448456', 'step': 21244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:15.484427', 'step': 21244, 'epoch': 3} {'type': 'loss', 'content': 0.04016238823533058, 'timestamp': '2025-10-01 04:39:15.486567', 'step': 21245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:15.519513', 'step': 21245, 'epoch': 3} {'type': 'loss', 'content': 0.04366004467010498, 'timestamp': '2025-10-01 04:39:15.531645', 'step': 21246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:15.562795', 'step': 21246, 'epoch': 3} {'type': 'loss', 'content': 0.03949813172221184, 'timestamp': '2025-10-01 04:39:15.565330', 'step': 21247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.599862', 'step': 21247, 'epoch': 3} {'type': 'loss', 'content': 0.0665498822927475, 'timestamp': '2025-10-01 04:39:15.632530', 'step': 21248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:15.664849', 'step': 21248, 'epoch': 3} {'type': 'loss', 'content': 0.08437986671924591, 'timestamp': '2025-10-01 04:39:15.666964', 'step': 21249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.697727', 'step': 21249, 'epoch': 3} {'type': 'loss', 'content': 0.07139039039611816, 'timestamp': '2025-10-01 04:39:15.699787', 'step': 21250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.730779', 'step': 21250, 'epoch': 3} {'type': 'loss', 'content': 0.08144710212945938, 'timestamp': '2025-10-01 04:39:15.735528', 'step': 21251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.766560', 'step': 21251, 'epoch': 3} {'type': 'loss', 'content': 0.05289921164512634, 'timestamp': '2025-10-01 04:39:15.790225', 'step': 21252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:15.822009', 'step': 21252, 'epoch': 3} {'type': 'loss', 'content': 0.035615283995866776, 'timestamp': '2025-10-01 04:39:15.824313', 'step': 21253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:15.855420', 'step': 21253, 'epoch': 3} {'type': 'loss', 'content': 0.0571892149746418, 'timestamp': '2025-10-01 04:39:15.858197', 'step': 21254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.901404', 'step': 21254, 'epoch': 3} {'type': 'loss', 'content': 0.07122279703617096, 'timestamp': '2025-10-01 04:39:15.903685', 'step': 21255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:15.936411', 'step': 21255, 'epoch': 3} {'type': 'loss', 'content': 0.10120567679405212, 'timestamp': '2025-10-01 04:39:15.960003', 'step': 21256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:15.991838', 'step': 21256, 'epoch': 3} {'type': 'loss', 'content': 0.06601953506469727, 'timestamp': '2025-10-01 04:39:15.999268', 'step': 21257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:16.033746', 'step': 21257, 'epoch': 3} {'type': 'loss', 'content': 0.07850782573223114, 'timestamp': '2025-10-01 04:39:16.036402', 'step': 21258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:16.067400', 'step': 21258, 'epoch': 3} {'type': 'loss', 'content': 0.0880371704697609, 'timestamp': '2025-10-01 04:39:16.070066', 'step': 21259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:16.100668', 'step': 21259, 'epoch': 3} {'type': 'loss', 'content': 0.08613235503435135, 'timestamp': '2025-10-01 04:39:16.130385', 'step': 21260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:16.161233', 'step': 21260, 'epoch': 3} {'type': 'loss', 'content': 0.06323222815990448, 'timestamp': '2025-10-01 04:39:16.163482', 'step': 21261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:16.194264', 'step': 21261, 'epoch': 3} {'type': 'loss', 'content': 0.09290697425603867, 'timestamp': '2025-10-01 04:39:16.196374', 'step': 21262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:16.227131', 'step': 21262, 'epoch': 3} {'type': 'loss', 'content': 0.13497351109981537, 'timestamp': '2025-10-01 04:39:16.229341', 'step': 21263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:16.260808', 'step': 21263, 'epoch': 3} {'type': 'loss', 'content': 0.04621896520256996, 'timestamp': '2025-10-01 04:39:16.284662', 'step': 21264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:16.314624', 'step': 21264, 'epoch': 3} {'type': 'loss', 'content': 0.13588473200798035, 'timestamp': '2025-10-01 04:39:16.316967', 'step': 21265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:16.348077', 'step': 21265, 'epoch': 3} {'type': 'loss', 'content': 0.043129999190568924, 'timestamp': '2025-10-01 04:39:16.362992', 'step': 21266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:16.394120', 'step': 21266, 'epoch': 3} {'type': 'loss', 'content': 0.08474668115377426, 'timestamp': '2025-10-01 04:39:16.396209', 'step': 21267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:16.428709', 'step': 21267, 'epoch': 3} {'type': 'loss', 'content': 0.05772143229842186, 'timestamp': '2025-10-01 04:39:16.453222', 'step': 21268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:16.486529', 'step': 21268, 'epoch': 3} {'type': 'loss', 'content': 0.04422588646411896, 'timestamp': '2025-10-01 04:39:16.488857', 'step': 21269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:16.522654', 'step': 21269, 'epoch': 3} {'type': 'loss', 'content': 0.10246280580759048, 'timestamp': '2025-10-01 04:39:16.528167', 'step': 21270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:16.571863', 'step': 21270, 'epoch': 3} {'type': 'loss', 'content': 0.025643832981586456, 'timestamp': '2025-10-01 04:39:16.573929', 'step': 21271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:16.604795', 'step': 21271, 'epoch': 3} {'type': 'loss', 'content': 0.08135898411273956, 'timestamp': '2025-10-01 04:39:16.628984', 'step': 21272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:16.661291', 'step': 21272, 'epoch': 3} {'type': 'loss', 'content': 0.09646497666835785, 'timestamp': '2025-10-01 04:39:16.674344', 'step': 21273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:16.707201', 'step': 21273, 'epoch': 3} {'type': 'loss', 'content': 0.07831442356109619, 'timestamp': '2025-10-01 04:39:16.709453', 'step': 21274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:16.739660', 'step': 21274, 'epoch': 3} {'type': 'loss', 'content': 0.14476421475410461, 'timestamp': '2025-10-01 04:39:16.741870', 'step': 21275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:16.774167', 'step': 21275, 'epoch': 3} {'type': 'loss', 'content': 0.08419663459062576, 'timestamp': '2025-10-01 04:39:16.797976', 'step': 21276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:16.829275', 'step': 21276, 'epoch': 3} {'type': 'loss', 'content': 0.06670503318309784, 'timestamp': '2025-10-01 04:39:16.831463', 'step': 21277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:16.861752', 'step': 21277, 'epoch': 3} {'type': 'loss', 'content': 0.10466892272233963, 'timestamp': '2025-10-01 04:39:16.865703', 'step': 21278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:16.896936', 'step': 21278, 'epoch': 3} {'type': 'loss', 'content': 0.08486122637987137, 'timestamp': '2025-10-01 04:39:16.899568', 'step': 21279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:16.930458', 'step': 21279, 'epoch': 3} {'type': 'loss', 'content': 0.12917675077915192, 'timestamp': '2025-10-01 04:39:16.954255', 'step': 21280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:16.996819', 'step': 21280, 'epoch': 3} {'type': 'loss', 'content': 0.0368795320391655, 'timestamp': '2025-10-01 04:39:16.999633', 'step': 21281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:17.030821', 'step': 21281, 'epoch': 3} {'type': 'loss', 'content': 0.07474568486213684, 'timestamp': '2025-10-01 04:39:17.033197', 'step': 21282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:17.064120', 'step': 21282, 'epoch': 3} {'type': 'loss', 'content': 0.05487522855401039, 'timestamp': '2025-10-01 04:39:17.066782', 'step': 21283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:17.097356', 'step': 21283, 'epoch': 3} {'type': 'loss', 'content': 0.10769747942686081, 'timestamp': '2025-10-01 04:39:17.121644', 'step': 21284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:17.153087', 'step': 21284, 'epoch': 3} {'type': 'loss', 'content': 0.05841480940580368, 'timestamp': '2025-10-01 04:39:17.159336', 'step': 21285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:17.190287', 'step': 21285, 'epoch': 3} {'type': 'loss', 'content': 0.03281288594007492, 'timestamp': '2025-10-01 04:39:17.193042', 'step': 21286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:17.224321', 'step': 21286, 'epoch': 3} {'type': 'loss', 'content': 0.08768787235021591, 'timestamp': '2025-10-01 04:39:17.226360', 'step': 21287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:17.257794', 'step': 21287, 'epoch': 3} {'type': 'loss', 'content': 0.05331815034151077, 'timestamp': '2025-10-01 04:39:17.282791', 'step': 21288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:17.313175', 'step': 21288, 'epoch': 3} {'type': 'loss', 'content': 0.04678918793797493, 'timestamp': '2025-10-01 04:39:17.315177', 'step': 21289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:17.345441', 'step': 21289, 'epoch': 3} {'type': 'loss', 'content': 0.1354007124900818, 'timestamp': '2025-10-01 04:39:17.347495', 'step': 21290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:17.378359', 'step': 21290, 'epoch': 3} {'type': 'loss', 'content': 0.07004371285438538, 'timestamp': '2025-10-01 04:39:17.384662', 'step': 21291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:17.415055', 'step': 21291, 'epoch': 3} {'type': 'loss', 'content': 0.06993798911571503, 'timestamp': '2025-10-01 04:39:17.438592', 'step': 21292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:17.474945', 'step': 21292, 'epoch': 3} {'type': 'loss', 'content': 0.06517980247735977, 'timestamp': '2025-10-01 04:39:17.476869', 'step': 21293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:17.507543', 'step': 21293, 'epoch': 3} {'type': 'loss', 'content': 0.03825089707970619, 'timestamp': '2025-10-01 04:39:17.515096', 'step': 21294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:17.546804', 'step': 21294, 'epoch': 3} {'type': 'loss', 'content': 0.032240353524684906, 'timestamp': '2025-10-01 04:39:17.553456', 'step': 21295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:17.584298', 'step': 21295, 'epoch': 3} {'type': 'loss', 'content': 0.12382006645202637, 'timestamp': '2025-10-01 04:39:17.608055', 'step': 21296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:17.638802', 'step': 21296, 'epoch': 3} {'type': 'loss', 'content': 0.057170141488313675, 'timestamp': '2025-10-01 04:39:17.641365', 'step': 21297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:17.672026', 'step': 21297, 'epoch': 3} {'type': 'loss', 'content': 0.0751836746931076, 'timestamp': '2025-10-01 04:39:17.674562', 'step': 21298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:17.706882', 'step': 21298, 'epoch': 3} {'type': 'loss', 'content': 0.11068353801965714, 'timestamp': '2025-10-01 04:39:17.708964', 'step': 21299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:17.742186', 'step': 21299, 'epoch': 3} {'type': 'loss', 'content': 0.04886161535978317, 'timestamp': '2025-10-01 04:39:17.765899', 'step': 21300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:17.800374', 'step': 21300, 'epoch': 3} {'type': 'loss', 'content': 0.05122606083750725, 'timestamp': '2025-10-01 04:39:17.805344', 'step': 21301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:17.836396', 'step': 21301, 'epoch': 3} {'type': 'loss', 'content': 0.04636911302804947, 'timestamp': '2025-10-01 04:39:17.839127', 'step': 21302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:17.882948', 'step': 21302, 'epoch': 3} {'type': 'loss', 'content': 0.046591952443122864, 'timestamp': '2025-10-01 04:39:17.885144', 'step': 21303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:17.915553', 'step': 21303, 'epoch': 3} {'type': 'loss', 'content': 0.05297451466321945, 'timestamp': '2025-10-01 04:39:17.939063', 'step': 21304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:17.970140', 'step': 21304, 'epoch': 3} {'type': 'loss', 'content': 0.041184742003679276, 'timestamp': '2025-10-01 04:39:17.972149', 'step': 21305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:18.003506', 'step': 21305, 'epoch': 3} {'type': 'loss', 'content': 0.1445283591747284, 'timestamp': '2025-10-01 04:39:18.044200', 'step': 21306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:18.074559', 'step': 21306, 'epoch': 3} {'type': 'loss', 'content': 0.15546318888664246, 'timestamp': '2025-10-01 04:39:18.077111', 'step': 21307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:18.116607', 'step': 21307, 'epoch': 3} {'type': 'loss', 'content': 0.140742689371109, 'timestamp': '2025-10-01 04:39:18.141630', 'step': 21308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:18.176599', 'step': 21308, 'epoch': 3} {'type': 'loss', 'content': 0.0678848922252655, 'timestamp': '2025-10-01 04:39:18.178865', 'step': 21309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:18.209568', 'step': 21309, 'epoch': 3} {'type': 'loss', 'content': 0.05861477926373482, 'timestamp': '2025-10-01 04:39:18.211930', 'step': 21310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:18.242284', 'step': 21310, 'epoch': 3} {'type': 'loss', 'content': 0.08285322040319443, 'timestamp': '2025-10-01 04:39:18.244469', 'step': 21311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:18.277058', 'step': 21311, 'epoch': 3} {'type': 'loss', 'content': 0.09274256229400635, 'timestamp': '2025-10-01 04:39:18.301411', 'step': 21312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:18.331807', 'step': 21312, 'epoch': 3} {'type': 'loss', 'content': 0.07566613703966141, 'timestamp': '2025-10-01 04:39:18.342627', 'step': 21313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:18.372786', 'step': 21313, 'epoch': 3} {'type': 'loss', 'content': 0.12111467868089676, 'timestamp': '2025-10-01 04:39:18.375182', 'step': 21314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:18.412204', 'step': 21314, 'epoch': 3} {'type': 'loss', 'content': 0.030810849741101265, 'timestamp': '2025-10-01 04:39:18.414243', 'step': 21315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:18.445011', 'step': 21315, 'epoch': 3} {'type': 'loss', 'content': 0.10415296256542206, 'timestamp': '2025-10-01 04:39:18.468648', 'step': 21316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:18.498989', 'step': 21316, 'epoch': 3} {'type': 'loss', 'content': 0.05214880779385567, 'timestamp': '2025-10-01 04:39:18.501010', 'step': 21317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:18.531664', 'step': 21317, 'epoch': 3} {'type': 'loss', 'content': 0.09751057624816895, 'timestamp': '2025-10-01 04:39:18.533683', 'step': 21318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:18.563764', 'step': 21318, 'epoch': 3} {'type': 'loss', 'content': 0.12292514741420746, 'timestamp': '2025-10-01 04:39:18.565950', 'step': 21319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:18.596202', 'step': 21319, 'epoch': 3} {'type': 'loss', 'content': 0.09661004692316055, 'timestamp': '2025-10-01 04:39:18.619812', 'step': 21320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:18.655552', 'step': 21320, 'epoch': 3} {'type': 'loss', 'content': 0.0738808885216713, 'timestamp': '2025-10-01 04:39:18.657735', 'step': 21321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:18.689190', 'step': 21321, 'epoch': 3} {'type': 'loss', 'content': 0.13309821486473083, 'timestamp': '2025-10-01 04:39:18.691613', 'step': 21322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:18.728787', 'step': 21322, 'epoch': 3} {'type': 'loss', 'content': 0.06969013810157776, 'timestamp': '2025-10-01 04:39:18.730878', 'step': 21323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:18.762576', 'step': 21323, 'epoch': 3} {'type': 'loss', 'content': 0.08097390085458755, 'timestamp': '2025-10-01 04:39:18.786317', 'step': 21324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:18.816670', 'step': 21324, 'epoch': 3} {'type': 'loss', 'content': 0.10157378762960434, 'timestamp': '2025-10-01 04:39:18.818816', 'step': 21325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:18.848850', 'step': 21325, 'epoch': 3} {'type': 'loss', 'content': 0.044490423053503036, 'timestamp': '2025-10-01 04:39:18.851189', 'step': 21326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:18.882989', 'step': 21326, 'epoch': 3} {'type': 'loss', 'content': 0.09205710142850876, 'timestamp': '2025-10-01 04:39:18.885203', 'step': 21327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:18.915067', 'step': 21327, 'epoch': 3} {'type': 'loss', 'content': 0.030513154342770576, 'timestamp': '2025-10-01 04:39:18.946150', 'step': 21328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:18.977087', 'step': 21328, 'epoch': 3} {'type': 'loss', 'content': 0.0976574495434761, 'timestamp': '2025-10-01 04:39:18.979158', 'step': 21329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:19.009361', 'step': 21329, 'epoch': 3} {'type': 'loss', 'content': 0.06939125061035156, 'timestamp': '2025-10-01 04:39:19.011419', 'step': 21330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:19.042135', 'step': 21330, 'epoch': 3} {'type': 'loss', 'content': 0.03239981085062027, 'timestamp': '2025-10-01 04:39:19.044149', 'step': 21331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:19.075363', 'step': 21331, 'epoch': 3} {'type': 'loss', 'content': 0.043543409556150436, 'timestamp': '2025-10-01 04:39:19.099049', 'step': 21332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:39:19.130130', 'step': 21332, 'epoch': 3} {'type': 'loss', 'content': 0.04482332989573479, 'timestamp': '2025-10-01 04:39:19.132277', 'step': 21333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:19.163303', 'step': 21333, 'epoch': 3} {'type': 'loss', 'content': 0.12948638200759888, 'timestamp': '2025-10-01 04:39:19.165744', 'step': 21334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:19.195741', 'step': 21334, 'epoch': 3} {'type': 'loss', 'content': 0.07584753632545471, 'timestamp': '2025-10-01 04:39:19.197696', 'step': 21335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:19.229001', 'step': 21335, 'epoch': 3} {'type': 'loss', 'content': 0.057537391781806946, 'timestamp': '2025-10-01 04:39:19.252917', 'step': 21336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:19.283308', 'step': 21336, 'epoch': 3} {'type': 'loss', 'content': 0.03995740786194801, 'timestamp': '2025-10-01 04:39:19.285770', 'step': 21337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:19.323077', 'step': 21337, 'epoch': 3} {'type': 'loss', 'content': 0.03807120770215988, 'timestamp': '2025-10-01 04:39:19.325196', 'step': 21338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:19.355238', 'step': 21338, 'epoch': 3} {'type': 'loss', 'content': 0.10280555486679077, 'timestamp': '2025-10-01 04:39:19.357263', 'step': 21339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:19.387389', 'step': 21339, 'epoch': 3} {'type': 'loss', 'content': 0.05324694886803627, 'timestamp': '2025-10-01 04:39:19.411506', 'step': 21340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:19.442354', 'step': 21340, 'epoch': 3} {'type': 'loss', 'content': 0.03650720417499542, 'timestamp': '2025-10-01 04:39:19.444599', 'step': 21341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:19.475640', 'step': 21341, 'epoch': 3} {'type': 'loss', 'content': 0.1216869056224823, 'timestamp': '2025-10-01 04:39:19.477920', 'step': 21342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:19.509206', 'step': 21342, 'epoch': 3} {'type': 'loss', 'content': 0.06305788457393646, 'timestamp': '2025-10-01 04:39:19.511695', 'step': 21343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:19.544642', 'step': 21343, 'epoch': 3} {'type': 'loss', 'content': 0.08578699827194214, 'timestamp': '2025-10-01 04:39:19.571629', 'step': 21344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:19.601665', 'step': 21344, 'epoch': 3} {'type': 'loss', 'content': 0.034108102321624756, 'timestamp': '2025-10-01 04:39:19.603781', 'step': 21345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:19.635524', 'step': 21345, 'epoch': 3} {'type': 'loss', 'content': 0.04054862633347511, 'timestamp': '2025-10-01 04:39:19.637492', 'step': 21346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:19.673080', 'step': 21346, 'epoch': 3} {'type': 'loss', 'content': 0.06414645165205002, 'timestamp': '2025-10-01 04:39:19.676057', 'step': 21347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:19.708067', 'step': 21347, 'epoch': 3} {'type': 'loss', 'content': 0.05286650359630585, 'timestamp': '2025-10-01 04:39:19.731853', 'step': 21348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:19.768418', 'step': 21348, 'epoch': 3} {'type': 'loss', 'content': 0.07176965475082397, 'timestamp': '2025-10-01 04:39:19.770530', 'step': 21349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:19.801739', 'step': 21349, 'epoch': 3} {'type': 'loss', 'content': 0.1258900761604309, 'timestamp': '2025-10-01 04:39:19.804627', 'step': 21350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:19.836364', 'step': 21350, 'epoch': 3} {'type': 'loss', 'content': 0.06436089426279068, 'timestamp': '2025-10-01 04:39:19.838553', 'step': 21351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:19.868917', 'step': 21351, 'epoch': 3} {'type': 'loss', 'content': 0.03905847668647766, 'timestamp': '2025-10-01 04:39:19.892461', 'step': 21352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:19.924156', 'step': 21352, 'epoch': 3} {'type': 'loss', 'content': 0.0454968586564064, 'timestamp': '2025-10-01 04:39:19.926106', 'step': 21353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:19.956155', 'step': 21353, 'epoch': 3} {'type': 'loss', 'content': 0.02930510975420475, 'timestamp': '2025-10-01 04:39:19.958188', 'step': 21354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:19.988294', 'step': 21354, 'epoch': 3} {'type': 'loss', 'content': 0.07345712929964066, 'timestamp': '2025-10-01 04:39:19.990851', 'step': 21355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.022009', 'step': 21355, 'epoch': 3} {'type': 'loss', 'content': 0.08813818544149399, 'timestamp': '2025-10-01 04:39:20.045728', 'step': 21356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:20.076062', 'step': 21356, 'epoch': 3} {'type': 'loss', 'content': 0.05949237197637558, 'timestamp': '2025-10-01 04:39:20.078266', 'step': 21357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.108717', 'step': 21357, 'epoch': 3} {'type': 'loss', 'content': 0.019677313044667244, 'timestamp': '2025-10-01 04:39:20.111643', 'step': 21358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.141952', 'step': 21358, 'epoch': 3} {'type': 'loss', 'content': 0.0725487694144249, 'timestamp': '2025-10-01 04:39:20.144221', 'step': 21359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.174800', 'step': 21359, 'epoch': 3} {'type': 'loss', 'content': 0.030426260083913803, 'timestamp': '2025-10-01 04:39:20.199107', 'step': 21360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:20.237904', 'step': 21360, 'epoch': 3} {'type': 'loss', 'content': 0.07068092375993729, 'timestamp': '2025-10-01 04:39:20.240049', 'step': 21361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:20.271141', 'step': 21361, 'epoch': 3} {'type': 'loss', 'content': 0.09133828431367874, 'timestamp': '2025-10-01 04:39:20.273567', 'step': 21362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:20.304458', 'step': 21362, 'epoch': 3} {'type': 'loss', 'content': 0.042005278170108795, 'timestamp': '2025-10-01 04:39:20.306344', 'step': 21363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.337459', 'step': 21363, 'epoch': 3} {'type': 'loss', 'content': 0.11665239930152893, 'timestamp': '2025-10-01 04:39:20.360893', 'step': 21364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:20.395104', 'step': 21364, 'epoch': 3} {'type': 'loss', 'content': 0.08289311826229095, 'timestamp': '2025-10-01 04:39:20.396976', 'step': 21365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.427647', 'step': 21365, 'epoch': 3} {'type': 'loss', 'content': 0.0909249484539032, 'timestamp': '2025-10-01 04:39:20.429757', 'step': 21366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:20.459856', 'step': 21366, 'epoch': 3} {'type': 'loss', 'content': 0.1604354977607727, 'timestamp': '2025-10-01 04:39:20.462075', 'step': 21367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:20.492993', 'step': 21367, 'epoch': 3} {'type': 'loss', 'content': 0.08128821104764938, 'timestamp': '2025-10-01 04:39:20.516979', 'step': 21368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.547176', 'step': 21368, 'epoch': 3} {'type': 'loss', 'content': 0.03860624507069588, 'timestamp': '2025-10-01 04:39:20.549891', 'step': 21369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:20.580217', 'step': 21369, 'epoch': 3} {'type': 'loss', 'content': 0.026687905192375183, 'timestamp': '2025-10-01 04:39:20.583740', 'step': 21370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.615811', 'step': 21370, 'epoch': 3} {'type': 'loss', 'content': 0.02919573150575161, 'timestamp': '2025-10-01 04:39:20.617729', 'step': 21371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:20.649036', 'step': 21371, 'epoch': 3} {'type': 'loss', 'content': 0.03523091599345207, 'timestamp': '2025-10-01 04:39:20.672325', 'step': 21372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.702781', 'step': 21372, 'epoch': 3} {'type': 'loss', 'content': 0.07126213610172272, 'timestamp': '2025-10-01 04:39:20.704757', 'step': 21373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.734582', 'step': 21373, 'epoch': 3} {'type': 'loss', 'content': 0.09477094560861588, 'timestamp': '2025-10-01 04:39:20.736800', 'step': 21374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:20.767521', 'step': 21374, 'epoch': 3} {'type': 'loss', 'content': 0.085113525390625, 'timestamp': '2025-10-01 04:39:20.770290', 'step': 21375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:20.800422', 'step': 21375, 'epoch': 3} {'type': 'loss', 'content': 0.07663989812135696, 'timestamp': '2025-10-01 04:39:20.824031', 'step': 21376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:20.853921', 'step': 21376, 'epoch': 3} {'type': 'loss', 'content': 0.08846898376941681, 'timestamp': '2025-10-01 04:39:20.856065', 'step': 21377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:20.886441', 'step': 21377, 'epoch': 3} {'type': 'loss', 'content': 0.043125394731760025, 'timestamp': '2025-10-01 04:39:20.888540', 'step': 21378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:20.918315', 'step': 21378, 'epoch': 3} {'type': 'loss', 'content': 0.11003727465867996, 'timestamp': '2025-10-01 04:39:20.920544', 'step': 21379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:20.950444', 'step': 21379, 'epoch': 3} {'type': 'loss', 'content': 0.043367862701416016, 'timestamp': '2025-10-01 04:39:20.974156', 'step': 21380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.007129', 'step': 21380, 'epoch': 3} {'type': 'loss', 'content': 0.06652883440256119, 'timestamp': '2025-10-01 04:39:21.009070', 'step': 21381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:21.039043', 'step': 21381, 'epoch': 3} {'type': 'loss', 'content': 0.04318593442440033, 'timestamp': '2025-10-01 04:39:21.041480', 'step': 21382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:21.075545', 'step': 21382, 'epoch': 3} {'type': 'loss', 'content': 0.08223528414964676, 'timestamp': '2025-10-01 04:39:21.078107', 'step': 21383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:21.108956', 'step': 21383, 'epoch': 3} {'type': 'loss', 'content': 0.07175809144973755, 'timestamp': '2025-10-01 04:39:21.137077', 'step': 21384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.167967', 'step': 21384, 'epoch': 3} {'type': 'loss', 'content': 0.10343881696462631, 'timestamp': '2025-10-01 04:39:21.170706', 'step': 21385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.201786', 'step': 21385, 'epoch': 3} {'type': 'loss', 'content': 0.012951280921697617, 'timestamp': '2025-10-01 04:39:21.206747', 'step': 21386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:21.239280', 'step': 21386, 'epoch': 3} {'type': 'loss', 'content': 0.04572982341051102, 'timestamp': '2025-10-01 04:39:21.241499', 'step': 21387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.271770', 'step': 21387, 'epoch': 3} {'type': 'loss', 'content': 0.049707479774951935, 'timestamp': '2025-10-01 04:39:21.299677', 'step': 21388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.331055', 'step': 21388, 'epoch': 3} {'type': 'loss', 'content': 0.05378556251525879, 'timestamp': '2025-10-01 04:39:21.333065', 'step': 21389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.364999', 'step': 21389, 'epoch': 3} {'type': 'loss', 'content': 0.017847899347543716, 'timestamp': '2025-10-01 04:39:21.369058', 'step': 21390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:21.400977', 'step': 21390, 'epoch': 3} {'type': 'loss', 'content': 0.08568038791418076, 'timestamp': '2025-10-01 04:39:21.403224', 'step': 21391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.433826', 'step': 21391, 'epoch': 3} {'type': 'loss', 'content': 0.0423198826611042, 'timestamp': '2025-10-01 04:39:21.465071', 'step': 21392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.495312', 'step': 21392, 'epoch': 3} {'type': 'loss', 'content': 0.06007234752178192, 'timestamp': '2025-10-01 04:39:21.497533', 'step': 21393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:21.530367', 'step': 21393, 'epoch': 3} {'type': 'loss', 'content': 0.05982092767953873, 'timestamp': '2025-10-01 04:39:21.532361', 'step': 21394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:21.565419', 'step': 21394, 'epoch': 3} {'type': 'loss', 'content': 0.04644652456045151, 'timestamp': '2025-10-01 04:39:21.567918', 'step': 21395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:21.598572', 'step': 21395, 'epoch': 3} {'type': 'loss', 'content': 0.027211392298340797, 'timestamp': '2025-10-01 04:39:21.623492', 'step': 21396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.653784', 'step': 21396, 'epoch': 3} {'type': 'loss', 'content': 0.07432510703802109, 'timestamp': '2025-10-01 04:39:21.655854', 'step': 21397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.687448', 'step': 21397, 'epoch': 3} {'type': 'loss', 'content': 0.0713718980550766, 'timestamp': '2025-10-01 04:39:21.689625', 'step': 21398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.719524', 'step': 21398, 'epoch': 3} {'type': 'loss', 'content': 0.054782163351774216, 'timestamp': '2025-10-01 04:39:21.721867', 'step': 21399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.753284', 'step': 21399, 'epoch': 3} {'type': 'loss', 'content': 0.11758354306221008, 'timestamp': '2025-10-01 04:39:21.777715', 'step': 21400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.808353', 'step': 21400, 'epoch': 3} {'type': 'loss', 'content': 0.08529269695281982, 'timestamp': '2025-10-01 04:39:21.810396', 'step': 21401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:21.840472', 'step': 21401, 'epoch': 3} {'type': 'loss', 'content': 0.06622646749019623, 'timestamp': '2025-10-01 04:39:21.842540', 'step': 21402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:21.872839', 'step': 21402, 'epoch': 3} {'type': 'loss', 'content': 0.07944424450397491, 'timestamp': '2025-10-01 04:39:21.875105', 'step': 21403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:21.905450', 'step': 21403, 'epoch': 3} {'type': 'loss', 'content': 0.07910286635160446, 'timestamp': '2025-10-01 04:39:21.929128', 'step': 21404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:21.959287', 'step': 21404, 'epoch': 3} {'type': 'loss', 'content': 0.05303243547677994, 'timestamp': '2025-10-01 04:39:21.961391', 'step': 21405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:21.991457', 'step': 21405, 'epoch': 3} {'type': 'loss', 'content': 0.0856509655714035, 'timestamp': '2025-10-01 04:39:21.994318', 'step': 21406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:22.024234', 'step': 21406, 'epoch': 3} {'type': 'loss', 'content': 0.056060660630464554, 'timestamp': '2025-10-01 04:39:22.027761', 'step': 21407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:22.060267', 'step': 21407, 'epoch': 3} {'type': 'loss', 'content': 0.06759386509656906, 'timestamp': '2025-10-01 04:39:22.083898', 'step': 21408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:22.114216', 'step': 21408, 'epoch': 3} {'type': 'loss', 'content': 0.06934239715337753, 'timestamp': '2025-10-01 04:39:22.116402', 'step': 21409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:22.146405', 'step': 21409, 'epoch': 3} {'type': 'loss', 'content': 0.06384483724832535, 'timestamp': '2025-10-01 04:39:22.148496', 'step': 21410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:22.178753', 'step': 21410, 'epoch': 3} {'type': 'loss', 'content': 0.06257867813110352, 'timestamp': '2025-10-01 04:39:22.180888', 'step': 21411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:22.212110', 'step': 21411, 'epoch': 3} {'type': 'loss', 'content': 0.09653662890195847, 'timestamp': '2025-10-01 04:39:22.235771', 'step': 21412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:22.267199', 'step': 21412, 'epoch': 3} {'type': 'loss', 'content': 0.1401299387216568, 'timestamp': '2025-10-01 04:39:22.269363', 'step': 21413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:22.301067', 'step': 21413, 'epoch': 3} {'type': 'loss', 'content': 0.03314175084233284, 'timestamp': '2025-10-01 04:39:22.303265', 'step': 21414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:22.333886', 'step': 21414, 'epoch': 3} {'type': 'loss', 'content': 0.046371039003133774, 'timestamp': '2025-10-01 04:39:22.336607', 'step': 21415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:22.371881', 'step': 21415, 'epoch': 3} {'type': 'loss', 'content': 0.0593469999730587, 'timestamp': '2025-10-01 04:39:22.397389', 'step': 21416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:22.430287', 'step': 21416, 'epoch': 3} {'type': 'loss', 'content': 0.023707490414381027, 'timestamp': '2025-10-01 04:39:22.441003', 'step': 21417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:22.477856', 'step': 21417, 'epoch': 3} {'type': 'loss', 'content': 0.07275772094726562, 'timestamp': '2025-10-01 04:39:22.480005', 'step': 21418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:22.517168', 'step': 21418, 'epoch': 3} {'type': 'loss', 'content': 0.08313047140836716, 'timestamp': '2025-10-01 04:39:22.519610', 'step': 21419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:22.550351', 'step': 21419, 'epoch': 3} {'type': 'loss', 'content': 0.12094318121671677, 'timestamp': '2025-10-01 04:39:22.574306', 'step': 21420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:22.604784', 'step': 21420, 'epoch': 3} {'type': 'loss', 'content': 0.05354510620236397, 'timestamp': '2025-10-01 04:39:22.606838', 'step': 21421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:22.640573', 'step': 21421, 'epoch': 3} {'type': 'loss', 'content': 0.10520409047603607, 'timestamp': '2025-10-01 04:39:22.642940', 'step': 21422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:22.674358', 'step': 21422, 'epoch': 3} {'type': 'loss', 'content': 0.08121269941329956, 'timestamp': '2025-10-01 04:39:22.677774', 'step': 21423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:22.708649', 'step': 21423, 'epoch': 3} {'type': 'loss', 'content': 0.0945596843957901, 'timestamp': '2025-10-01 04:39:22.732209', 'step': 21424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:22.763060', 'step': 21424, 'epoch': 3} {'type': 'loss', 'content': 0.02185521461069584, 'timestamp': '2025-10-01 04:39:22.775638', 'step': 21425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:22.806135', 'step': 21425, 'epoch': 3} {'type': 'loss', 'content': 0.06209398806095123, 'timestamp': '2025-10-01 04:39:22.808291', 'step': 21426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:22.838704', 'step': 21426, 'epoch': 3} {'type': 'loss', 'content': 0.08026201277971268, 'timestamp': '2025-10-01 04:39:22.840976', 'step': 21427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:22.872444', 'step': 21427, 'epoch': 3} {'type': 'loss', 'content': 0.05268366262316704, 'timestamp': '2025-10-01 04:39:22.896045', 'step': 21428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:22.930518', 'step': 21428, 'epoch': 3} {'type': 'loss', 'content': 0.08163659274578094, 'timestamp': '2025-10-01 04:39:22.932667', 'step': 21429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:22.964503', 'step': 21429, 'epoch': 3} {'type': 'loss', 'content': 0.05457744374871254, 'timestamp': '2025-10-01 04:39:22.966639', 'step': 21430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:23.003966', 'step': 21430, 'epoch': 3} {'type': 'loss', 'content': 0.07805784046649933, 'timestamp': '2025-10-01 04:39:23.006060', 'step': 21431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:23.036681', 'step': 21431, 'epoch': 3} {'type': 'loss', 'content': 0.09294325113296509, 'timestamp': '2025-10-01 04:39:23.060717', 'step': 21432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:23.091952', 'step': 21432, 'epoch': 3} {'type': 'loss', 'content': 0.023404289036989212, 'timestamp': '2025-10-01 04:39:23.093973', 'step': 21433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:23.124902', 'step': 21433, 'epoch': 3} {'type': 'loss', 'content': 0.04952511191368103, 'timestamp': '2025-10-01 04:39:23.134525', 'step': 21434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:23.165148', 'step': 21434, 'epoch': 3} {'type': 'loss', 'content': 0.07352142035961151, 'timestamp': '2025-10-01 04:39:23.172121', 'step': 21435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:23.202902', 'step': 21435, 'epoch': 3} {'type': 'loss', 'content': 0.09019257128238678, 'timestamp': '2025-10-01 04:39:23.226820', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:39:31.248937', 'step': 21436, 'epoch': 3} {'type': 'pplx', 'content': 10228.352193036391, 'timestamp': '2025-10-01 04:39:31.251834', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:31.280517', 'step': 21436, 'epoch': 3} {'type': 'loss', 'content': 0.04297315329313278, 'timestamp': '2025-10-01 04:39:31.282793', 'step': 21437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:31.315300', 'step': 21437, 'epoch': 3} {'type': 'loss', 'content': 0.07443999499082565, 'timestamp': '2025-10-01 04:39:31.317224', 'step': 21438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:31.347686', 'step': 21438, 'epoch': 3} {'type': 'loss', 'content': 0.10888469219207764, 'timestamp': '2025-10-01 04:39:31.349641', 'step': 21439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:31.381384', 'step': 21439, 'epoch': 3} {'type': 'loss', 'content': 0.027570083737373352, 'timestamp': '2025-10-01 04:39:31.404819', 'step': 21440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:31.436899', 'step': 21440, 'epoch': 3} {'type': 'loss', 'content': 0.06457771360874176, 'timestamp': '2025-10-01 04:39:31.438865', 'step': 21441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:31.476834', 'step': 21441, 'epoch': 3} {'type': 'loss', 'content': 0.051627177745103836, 'timestamp': '2025-10-01 04:39:31.478845', 'step': 21442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:31.510437', 'step': 21442, 'epoch': 3} {'type': 'loss', 'content': 0.0513678640127182, 'timestamp': '2025-10-01 04:39:31.512181', 'step': 21443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:31.543588', 'step': 21443, 'epoch': 3} {'type': 'loss', 'content': 0.037971965968608856, 'timestamp': '2025-10-01 04:39:31.567231', 'step': 21444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:31.599182', 'step': 21444, 'epoch': 3} {'type': 'loss', 'content': 0.06482815742492676, 'timestamp': '2025-10-01 04:39:31.601014', 'step': 21445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:31.632587', 'step': 21445, 'epoch': 3} {'type': 'loss', 'content': 0.02051415853202343, 'timestamp': '2025-10-01 04:39:31.634448', 'step': 21446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:31.667270', 'step': 21446, 'epoch': 3} {'type': 'loss', 'content': 0.12369952350854874, 'timestamp': '2025-10-01 04:39:31.669064', 'step': 21447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:31.701247', 'step': 21447, 'epoch': 3} {'type': 'loss', 'content': 0.0344758965075016, 'timestamp': '2025-10-01 04:39:31.724560', 'step': 21448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:31.756269', 'step': 21448, 'epoch': 3} {'type': 'loss', 'content': 0.0529945008456707, 'timestamp': '2025-10-01 04:39:31.758271', 'step': 21449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:31.789690', 'step': 21449, 'epoch': 3} {'type': 'loss', 'content': 0.11370985954999924, 'timestamp': '2025-10-01 04:39:31.792161', 'step': 21450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:31.825053', 'step': 21450, 'epoch': 3} {'type': 'loss', 'content': 0.1076810210943222, 'timestamp': '2025-10-01 04:39:31.827401', 'step': 21451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:31.858997', 'step': 21451, 'epoch': 3} {'type': 'loss', 'content': 0.08768491446971893, 'timestamp': '2025-10-01 04:39:31.882527', 'step': 21452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:31.914698', 'step': 21452, 'epoch': 3} {'type': 'loss', 'content': 0.050697650760412216, 'timestamp': '2025-10-01 04:39:31.917059', 'step': 21453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:31.949686', 'step': 21453, 'epoch': 3} {'type': 'loss', 'content': 0.11319509893655777, 'timestamp': '2025-10-01 04:39:31.958315', 'step': 21454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:31.990797', 'step': 21454, 'epoch': 3} {'type': 'loss', 'content': 0.0075582219287753105, 'timestamp': '2025-10-01 04:39:31.992796', 'step': 21455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:32.025077', 'step': 21455, 'epoch': 3} {'type': 'loss', 'content': 0.05421455204486847, 'timestamp': '2025-10-01 04:39:32.048696', 'step': 21456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:32.080040', 'step': 21456, 'epoch': 3} {'type': 'loss', 'content': 0.024763761088252068, 'timestamp': '2025-10-01 04:39:32.086153', 'step': 21457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:32.117340', 'step': 21457, 'epoch': 3} {'type': 'loss', 'content': 0.05400780215859413, 'timestamp': '2025-10-01 04:39:32.119637', 'step': 21458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:32.150996', 'step': 21458, 'epoch': 3} {'type': 'loss', 'content': 0.07394767552614212, 'timestamp': '2025-10-01 04:39:32.153048', 'step': 21459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:32.184746', 'step': 21459, 'epoch': 3} {'type': 'loss', 'content': 0.04633255675435066, 'timestamp': '2025-10-01 04:39:32.208428', 'step': 21460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:32.239289', 'step': 21460, 'epoch': 3} {'type': 'loss', 'content': 0.03509669750928879, 'timestamp': '2025-10-01 04:39:32.241773', 'step': 21461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:32.275229', 'step': 21461, 'epoch': 3} {'type': 'loss', 'content': 0.06698682904243469, 'timestamp': '2025-10-01 04:39:32.277511', 'step': 21462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:32.308825', 'step': 21462, 'epoch': 3} {'type': 'loss', 'content': 0.13808304071426392, 'timestamp': '2025-10-01 04:39:32.311039', 'step': 21463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:32.341684', 'step': 21463, 'epoch': 3} {'type': 'loss', 'content': 0.05062461271882057, 'timestamp': '2025-10-01 04:39:32.366424', 'step': 21464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:32.398170', 'step': 21464, 'epoch': 3} {'type': 'loss', 'content': 0.028373610228300095, 'timestamp': '2025-10-01 04:39:32.400278', 'step': 21465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:32.431535', 'step': 21465, 'epoch': 3} {'type': 'loss', 'content': 0.019971191883087158, 'timestamp': '2025-10-01 04:39:32.434023', 'step': 21466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:32.463945', 'step': 21466, 'epoch': 3} {'type': 'loss', 'content': 0.025564059615135193, 'timestamp': '2025-10-01 04:39:32.466026', 'step': 21467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:32.497528', 'step': 21467, 'epoch': 3} {'type': 'loss', 'content': 0.09280343353748322, 'timestamp': '2025-10-01 04:39:32.524005', 'step': 21468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:32.554081', 'step': 21468, 'epoch': 3} {'type': 'loss', 'content': 0.053334277123212814, 'timestamp': '2025-10-01 04:39:32.556184', 'step': 21469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:32.588147', 'step': 21469, 'epoch': 3} {'type': 'loss', 'content': 0.06493683904409409, 'timestamp': '2025-10-01 04:39:32.591351', 'step': 21470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:32.622643', 'step': 21470, 'epoch': 3} {'type': 'loss', 'content': 0.10539905726909637, 'timestamp': '2025-10-01 04:39:32.624733', 'step': 21471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:32.655397', 'step': 21471, 'epoch': 3} {'type': 'loss', 'content': 0.06406369060277939, 'timestamp': '2025-10-01 04:39:32.679035', 'step': 21472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:32.709569', 'step': 21472, 'epoch': 3} {'type': 'loss', 'content': 0.07176229357719421, 'timestamp': '2025-10-01 04:39:32.711536', 'step': 21473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:32.742668', 'step': 21473, 'epoch': 3} {'type': 'loss', 'content': 0.021908676251769066, 'timestamp': '2025-10-01 04:39:32.744639', 'step': 21474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:32.775263', 'step': 21474, 'epoch': 3} {'type': 'loss', 'content': 0.10116493701934814, 'timestamp': '2025-10-01 04:39:32.777521', 'step': 21475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:32.812882', 'step': 21475, 'epoch': 3} {'type': 'loss', 'content': 0.14995649456977844, 'timestamp': '2025-10-01 04:39:32.836386', 'step': 21476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:32.867064', 'step': 21476, 'epoch': 3} {'type': 'loss', 'content': 0.07786062359809875, 'timestamp': '2025-10-01 04:39:32.870107', 'step': 21477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:39:32.900954', 'step': 21477, 'epoch': 3} {'type': 'loss', 'content': 0.04481728747487068, 'timestamp': '2025-10-01 04:39:32.905181', 'step': 21478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:32.938897', 'step': 21478, 'epoch': 3} {'type': 'loss', 'content': 0.09160926938056946, 'timestamp': '2025-10-01 04:39:32.941302', 'step': 21479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:32.972260', 'step': 21479, 'epoch': 3} {'type': 'loss', 'content': 0.05223923176527023, 'timestamp': '2025-10-01 04:39:32.995936', 'step': 21480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:33.031638', 'step': 21480, 'epoch': 3} {'type': 'loss', 'content': 0.04413411393761635, 'timestamp': '2025-10-01 04:39:33.033994', 'step': 21481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:33.064472', 'step': 21481, 'epoch': 3} {'type': 'loss', 'content': 0.058648642152547836, 'timestamp': '2025-10-01 04:39:33.066639', 'step': 21482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:33.096807', 'step': 21482, 'epoch': 3} {'type': 'loss', 'content': 0.07086274027824402, 'timestamp': '2025-10-01 04:39:33.098963', 'step': 21483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:33.129071', 'step': 21483, 'epoch': 3} {'type': 'loss', 'content': 0.07288733869791031, 'timestamp': '2025-10-01 04:39:33.152795', 'step': 21484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:33.183042', 'step': 21484, 'epoch': 3} {'type': 'loss', 'content': 0.07846192270517349, 'timestamp': '2025-10-01 04:39:33.185167', 'step': 21485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:33.216154', 'step': 21485, 'epoch': 3} {'type': 'loss', 'content': 0.0811246857047081, 'timestamp': '2025-10-01 04:39:33.218107', 'step': 21486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:33.248159', 'step': 21486, 'epoch': 3} {'type': 'loss', 'content': 0.03969775140285492, 'timestamp': '2025-10-01 04:39:33.251026', 'step': 21487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:33.282227', 'step': 21487, 'epoch': 3} {'type': 'loss', 'content': 0.06806359440088272, 'timestamp': '2025-10-01 04:39:33.305785', 'step': 21488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:33.335643', 'step': 21488, 'epoch': 3} {'type': 'loss', 'content': 0.12483721971511841, 'timestamp': '2025-10-01 04:39:33.337715', 'step': 21489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:33.368144', 'step': 21489, 'epoch': 3} {'type': 'loss', 'content': 0.07023585587739944, 'timestamp': '2025-10-01 04:39:33.371111', 'step': 21490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:33.401943', 'step': 21490, 'epoch': 3} {'type': 'loss', 'content': 0.05405459180474281, 'timestamp': '2025-10-01 04:39:33.404458', 'step': 21491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:33.434542', 'step': 21491, 'epoch': 3} {'type': 'loss', 'content': 0.04832298308610916, 'timestamp': '2025-10-01 04:39:33.458389', 'step': 21492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:33.489631', 'step': 21492, 'epoch': 3} {'type': 'loss', 'content': 0.044039737433195114, 'timestamp': '2025-10-01 04:39:33.491879', 'step': 21493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:33.522296', 'step': 21493, 'epoch': 3} {'type': 'loss', 'content': 0.07412328571081161, 'timestamp': '2025-10-01 04:39:33.528550', 'step': 21494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:33.558597', 'step': 21494, 'epoch': 3} {'type': 'loss', 'content': 0.09671910852193832, 'timestamp': '2025-10-01 04:39:33.561414', 'step': 21495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:33.600932', 'step': 21495, 'epoch': 3} {'type': 'loss', 'content': 0.06706742942333221, 'timestamp': '2025-10-01 04:39:33.624377', 'step': 21496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:33.664700', 'step': 21496, 'epoch': 3} {'type': 'loss', 'content': 0.05251308158040047, 'timestamp': '2025-10-01 04:39:33.666702', 'step': 21497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:33.698675', 'step': 21497, 'epoch': 3} {'type': 'loss', 'content': 0.054924048483371735, 'timestamp': '2025-10-01 04:39:33.701272', 'step': 21498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:33.732197', 'step': 21498, 'epoch': 3} {'type': 'loss', 'content': 0.05168343335390091, 'timestamp': '2025-10-01 04:39:33.736128', 'step': 21499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:33.766321', 'step': 21499, 'epoch': 3} {'type': 'loss', 'content': 0.1177622377872467, 'timestamp': '2025-10-01 04:39:33.790019', 'step': 21500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21500', 'timestamp': '2025-10-01 04:39:39.167873', 'step': 21500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:39.199497', 'step': 21500, 'epoch': 3} {'type': 'loss', 'content': 0.07159677147865295, 'timestamp': '2025-10-01 04:39:39.201482', 'step': 21501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:39.233307', 'step': 21501, 'epoch': 3} {'type': 'loss', 'content': 0.04793647676706314, 'timestamp': '2025-10-01 04:39:39.235367', 'step': 21502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:39.269984', 'step': 21502, 'epoch': 3} {'type': 'loss', 'content': 0.025764798745512962, 'timestamp': '2025-10-01 04:39:39.272944', 'step': 21503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:39.304579', 'step': 21503, 'epoch': 3} {'type': 'loss', 'content': 0.06310015171766281, 'timestamp': '2025-10-01 04:39:39.328286', 'step': 21504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:39.362484', 'step': 21504, 'epoch': 3} {'type': 'loss', 'content': 0.10576978325843811, 'timestamp': '2025-10-01 04:39:39.366141', 'step': 21505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:39.404926', 'step': 21505, 'epoch': 3} {'type': 'loss', 'content': 0.018482083454728127, 'timestamp': '2025-10-01 04:39:39.407367', 'step': 21506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:39.439133', 'step': 21506, 'epoch': 3} {'type': 'loss', 'content': 0.05769459158182144, 'timestamp': '2025-10-01 04:39:39.442115', 'step': 21507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:39.474049', 'step': 21507, 'epoch': 3} {'type': 'loss', 'content': 0.097386434674263, 'timestamp': '2025-10-01 04:39:39.497690', 'step': 21508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:39.528388', 'step': 21508, 'epoch': 3} {'type': 'loss', 'content': 0.06205269694328308, 'timestamp': '2025-10-01 04:39:39.530416', 'step': 21509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:39.561645', 'step': 21509, 'epoch': 3} {'type': 'loss', 'content': 0.039706625044345856, 'timestamp': '2025-10-01 04:39:39.564061', 'step': 21510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:39.595321', 'step': 21510, 'epoch': 3} {'type': 'loss', 'content': 0.0432695709168911, 'timestamp': '2025-10-01 04:39:39.597774', 'step': 21511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:39.628435', 'step': 21511, 'epoch': 3} {'type': 'loss', 'content': 0.12158683687448502, 'timestamp': '2025-10-01 04:39:39.652325', 'step': 21512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:39.683829', 'step': 21512, 'epoch': 3} {'type': 'loss', 'content': 0.04825623333454132, 'timestamp': '2025-10-01 04:39:39.685790', 'step': 21513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:39.716903', 'step': 21513, 'epoch': 3} {'type': 'loss', 'content': 0.07309077680110931, 'timestamp': '2025-10-01 04:39:39.718919', 'step': 21514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:39.749821', 'step': 21514, 'epoch': 3} {'type': 'loss', 'content': 0.046642597764730453, 'timestamp': '2025-10-01 04:39:39.752037', 'step': 21515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:39.785699', 'step': 21515, 'epoch': 3} {'type': 'loss', 'content': 0.07031406462192535, 'timestamp': '2025-10-01 04:39:39.809071', 'step': 21516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:39.840426', 'step': 21516, 'epoch': 3} {'type': 'loss', 'content': 0.054505832493305206, 'timestamp': '2025-10-01 04:39:39.842636', 'step': 21517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:39.874706', 'step': 21517, 'epoch': 3} {'type': 'loss', 'content': 0.015390967950224876, 'timestamp': '2025-10-01 04:39:39.876838', 'step': 21518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:39.907451', 'step': 21518, 'epoch': 3} {'type': 'loss', 'content': 0.03687102720141411, 'timestamp': '2025-10-01 04:39:39.909757', 'step': 21519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:39.942255', 'step': 21519, 'epoch': 3} {'type': 'loss', 'content': 0.04189680516719818, 'timestamp': '2025-10-01 04:39:39.965708', 'step': 21520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:39.996285', 'step': 21520, 'epoch': 3} {'type': 'loss', 'content': 0.028048736974596977, 'timestamp': '2025-10-01 04:39:39.998326', 'step': 21521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.030630', 'step': 21521, 'epoch': 3} {'type': 'loss', 'content': 0.07260755449533463, 'timestamp': '2025-10-01 04:39:40.033358', 'step': 21522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:40.065348', 'step': 21522, 'epoch': 3} {'type': 'loss', 'content': 0.10105164349079132, 'timestamp': '2025-10-01 04:39:40.067753', 'step': 21523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.100152', 'step': 21523, 'epoch': 3} {'type': 'loss', 'content': 0.10154172778129578, 'timestamp': '2025-10-01 04:39:40.123740', 'step': 21524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:40.158086', 'step': 21524, 'epoch': 3} {'type': 'loss', 'content': 0.03252760320901871, 'timestamp': '2025-10-01 04:39:40.160184', 'step': 21525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.191248', 'step': 21525, 'epoch': 3} {'type': 'loss', 'content': 0.0215766578912735, 'timestamp': '2025-10-01 04:39:40.194445', 'step': 21526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:40.225306', 'step': 21526, 'epoch': 3} {'type': 'loss', 'content': 0.03039468824863434, 'timestamp': '2025-10-01 04:39:40.227961', 'step': 21527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.258716', 'step': 21527, 'epoch': 3} {'type': 'loss', 'content': 0.05882050096988678, 'timestamp': '2025-10-01 04:39:40.283224', 'step': 21528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:40.314190', 'step': 21528, 'epoch': 3} {'type': 'loss', 'content': 0.03079102747142315, 'timestamp': '2025-10-01 04:39:40.316765', 'step': 21529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:40.348559', 'step': 21529, 'epoch': 3} {'type': 'loss', 'content': 0.07227455079555511, 'timestamp': '2025-10-01 04:39:40.350740', 'step': 21530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:40.381954', 'step': 21530, 'epoch': 3} {'type': 'loss', 'content': 0.049267545342445374, 'timestamp': '2025-10-01 04:39:40.383950', 'step': 21531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.415016', 'step': 21531, 'epoch': 3} {'type': 'loss', 'content': 0.05094140022993088, 'timestamp': '2025-10-01 04:39:40.438556', 'step': 21532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.470143', 'step': 21532, 'epoch': 3} {'type': 'loss', 'content': 0.05845806375145912, 'timestamp': '2025-10-01 04:39:40.472313', 'step': 21533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.503552', 'step': 21533, 'epoch': 3} {'type': 'loss', 'content': 0.06665560603141785, 'timestamp': '2025-10-01 04:39:40.505889', 'step': 21534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.536288', 'step': 21534, 'epoch': 3} {'type': 'loss', 'content': 0.031552139669656754, 'timestamp': '2025-10-01 04:39:40.538587', 'step': 21535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.570651', 'step': 21535, 'epoch': 3} {'type': 'loss', 'content': 0.0852009505033493, 'timestamp': '2025-10-01 04:39:40.594241', 'step': 21536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:40.624547', 'step': 21536, 'epoch': 3} {'type': 'loss', 'content': 0.08173536509275436, 'timestamp': '2025-10-01 04:39:40.626638', 'step': 21537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:40.657439', 'step': 21537, 'epoch': 3} {'type': 'loss', 'content': 0.011106736026704311, 'timestamp': '2025-10-01 04:39:40.659453', 'step': 21538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.689774', 'step': 21538, 'epoch': 3} {'type': 'loss', 'content': 0.08204170316457748, 'timestamp': '2025-10-01 04:39:40.695515', 'step': 21539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.728596', 'step': 21539, 'epoch': 3} {'type': 'loss', 'content': 0.020603124052286148, 'timestamp': '2025-10-01 04:39:40.752108', 'step': 21540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:40.783527', 'step': 21540, 'epoch': 3} {'type': 'loss', 'content': 0.14026910066604614, 'timestamp': '2025-10-01 04:39:40.785648', 'step': 21541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:40.818899', 'step': 21541, 'epoch': 3} {'type': 'loss', 'content': 0.09114620089530945, 'timestamp': '2025-10-01 04:39:40.820911', 'step': 21542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.852022', 'step': 21542, 'epoch': 3} {'type': 'loss', 'content': 0.05701989680528641, 'timestamp': '2025-10-01 04:39:40.855149', 'step': 21543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.885544', 'step': 21543, 'epoch': 3} {'type': 'loss', 'content': 0.07693864405155182, 'timestamp': '2025-10-01 04:39:40.909241', 'step': 21544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:40.940037', 'step': 21544, 'epoch': 3} {'type': 'loss', 'content': 0.14371366798877716, 'timestamp': '2025-10-01 04:39:40.942108', 'step': 21545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:40.973039', 'step': 21545, 'epoch': 3} {'type': 'loss', 'content': 0.0339532271027565, 'timestamp': '2025-10-01 04:39:40.975175', 'step': 21546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:41.010391', 'step': 21546, 'epoch': 3} {'type': 'loss', 'content': 0.019095581024885178, 'timestamp': '2025-10-01 04:39:41.012469', 'step': 21547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:41.043361', 'step': 21547, 'epoch': 3} {'type': 'loss', 'content': 0.06645771861076355, 'timestamp': '2025-10-01 04:39:41.067357', 'step': 21548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:41.099257', 'step': 21548, 'epoch': 3} {'type': 'loss', 'content': 0.11035549640655518, 'timestamp': '2025-10-01 04:39:41.102811', 'step': 21549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:41.134962', 'step': 21549, 'epoch': 3} {'type': 'loss', 'content': 0.06953660398721695, 'timestamp': '2025-10-01 04:39:41.137275', 'step': 21550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:41.169006', 'step': 21550, 'epoch': 3} {'type': 'loss', 'content': 0.049312490969896317, 'timestamp': '2025-10-01 04:39:41.171156', 'step': 21551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:41.201543', 'step': 21551, 'epoch': 3} {'type': 'loss', 'content': 0.028374984860420227, 'timestamp': '2025-10-01 04:39:41.224990', 'step': 21552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:41.256356', 'step': 21552, 'epoch': 3} {'type': 'loss', 'content': 0.058547575026750565, 'timestamp': '2025-10-01 04:39:41.258751', 'step': 21553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:41.290790', 'step': 21553, 'epoch': 3} {'type': 'loss', 'content': 0.04443511739373207, 'timestamp': '2025-10-01 04:39:41.293223', 'step': 21554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:41.330805', 'step': 21554, 'epoch': 3} {'type': 'loss', 'content': 0.08662113547325134, 'timestamp': '2025-10-01 04:39:41.332868', 'step': 21555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:41.364060', 'step': 21555, 'epoch': 3} {'type': 'loss', 'content': 0.055524349212646484, 'timestamp': '2025-10-01 04:39:41.387630', 'step': 21556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:41.418676', 'step': 21556, 'epoch': 3} {'type': 'loss', 'content': 0.0168545413762331, 'timestamp': '2025-10-01 04:39:41.420788', 'step': 21557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:41.451299', 'step': 21557, 'epoch': 3} {'type': 'loss', 'content': 0.06415683031082153, 'timestamp': '2025-10-01 04:39:41.453326', 'step': 21558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:41.489470', 'step': 21558, 'epoch': 3} {'type': 'loss', 'content': 0.08130699396133423, 'timestamp': '2025-10-01 04:39:41.492254', 'step': 21559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:41.524837', 'step': 21559, 'epoch': 3} {'type': 'loss', 'content': 0.09994988143444061, 'timestamp': '2025-10-01 04:39:41.548343', 'step': 21560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:41.579032', 'step': 21560, 'epoch': 3} {'type': 'loss', 'content': 0.08707480877637863, 'timestamp': '2025-10-01 04:39:41.581075', 'step': 21561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:41.611850', 'step': 21561, 'epoch': 3} {'type': 'loss', 'content': 0.02345292828977108, 'timestamp': '2025-10-01 04:39:41.613911', 'step': 21562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:39:41.644235', 'step': 21562, 'epoch': 3} {'type': 'loss', 'content': 0.12745822966098785, 'timestamp': '2025-10-01 04:39:41.648440', 'step': 21563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:41.679221', 'step': 21563, 'epoch': 3} {'type': 'loss', 'content': 0.06815649569034576, 'timestamp': '2025-10-01 04:39:41.702691', 'step': 21564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:41.733038', 'step': 21564, 'epoch': 3} {'type': 'loss', 'content': 0.043555065989494324, 'timestamp': '2025-10-01 04:39:41.735270', 'step': 21565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:41.766122', 'step': 21565, 'epoch': 3} {'type': 'loss', 'content': 0.03521237522363663, 'timestamp': '2025-10-01 04:39:41.768425', 'step': 21566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:41.800412', 'step': 21566, 'epoch': 3} {'type': 'loss', 'content': 0.03472773730754852, 'timestamp': '2025-10-01 04:39:41.802543', 'step': 21567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:41.837019', 'step': 21567, 'epoch': 3} {'type': 'loss', 'content': 0.027300773188471794, 'timestamp': '2025-10-01 04:39:41.860554', 'step': 21568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:41.892433', 'step': 21568, 'epoch': 3} {'type': 'loss', 'content': 0.17161770164966583, 'timestamp': '2025-10-01 04:39:41.894548', 'step': 21569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:41.925138', 'step': 21569, 'epoch': 3} {'type': 'loss', 'content': 0.020339740440249443, 'timestamp': '2025-10-01 04:39:41.927096', 'step': 21570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:41.960288', 'step': 21570, 'epoch': 3} {'type': 'loss', 'content': 0.022731205448508263, 'timestamp': '2025-10-01 04:39:41.962385', 'step': 21571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:41.993912', 'step': 21571, 'epoch': 3} {'type': 'loss', 'content': 0.06764848530292511, 'timestamp': '2025-10-01 04:39:42.017617', 'step': 21572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:42.050043', 'step': 21572, 'epoch': 3} {'type': 'loss', 'content': 0.04304400831460953, 'timestamp': '2025-10-01 04:39:42.052090', 'step': 21573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:42.083976', 'step': 21573, 'epoch': 3} {'type': 'loss', 'content': 0.04603301361203194, 'timestamp': '2025-10-01 04:39:42.086135', 'step': 21574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:42.116414', 'step': 21574, 'epoch': 3} {'type': 'loss', 'content': 0.10137674957513809, 'timestamp': '2025-10-01 04:39:42.118493', 'step': 21575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:42.148799', 'step': 21575, 'epoch': 3} {'type': 'loss', 'content': 0.01702205277979374, 'timestamp': '2025-10-01 04:39:42.172988', 'step': 21576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:42.202933', 'step': 21576, 'epoch': 3} {'type': 'loss', 'content': 0.07061365246772766, 'timestamp': '2025-10-01 04:39:42.205021', 'step': 21577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:42.235710', 'step': 21577, 'epoch': 3} {'type': 'loss', 'content': 0.1296943873167038, 'timestamp': '2025-10-01 04:39:42.244526', 'step': 21578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:42.274270', 'step': 21578, 'epoch': 3} {'type': 'loss', 'content': 0.02834790013730526, 'timestamp': '2025-10-01 04:39:42.276436', 'step': 21579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:42.306831', 'step': 21579, 'epoch': 3} {'type': 'loss', 'content': 0.0875442773103714, 'timestamp': '2025-10-01 04:39:42.330814', 'step': 21580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:42.361692', 'step': 21580, 'epoch': 3} {'type': 'loss', 'content': 0.020101696252822876, 'timestamp': '2025-10-01 04:39:42.364589', 'step': 21581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:42.394897', 'step': 21581, 'epoch': 3} {'type': 'loss', 'content': 0.0171788539737463, 'timestamp': '2025-10-01 04:39:42.397616', 'step': 21582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:42.429074', 'step': 21582, 'epoch': 3} {'type': 'loss', 'content': 0.015703774988651276, 'timestamp': '2025-10-01 04:39:42.431333', 'step': 21583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:42.462307', 'step': 21583, 'epoch': 3} {'type': 'loss', 'content': 0.05948011204600334, 'timestamp': '2025-10-01 04:39:42.486246', 'step': 21584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:42.516720', 'step': 21584, 'epoch': 3} {'type': 'loss', 'content': 0.03084389492869377, 'timestamp': '2025-10-01 04:39:42.520750', 'step': 21585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:42.551290', 'step': 21585, 'epoch': 3} {'type': 'loss', 'content': 0.022517258301377296, 'timestamp': '2025-10-01 04:39:42.553599', 'step': 21586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:42.583377', 'step': 21586, 'epoch': 3} {'type': 'loss', 'content': 0.18849053978919983, 'timestamp': '2025-10-01 04:39:42.585781', 'step': 21587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:42.616994', 'step': 21587, 'epoch': 3} {'type': 'loss', 'content': 0.02576470375061035, 'timestamp': '2025-10-01 04:39:42.641016', 'step': 21588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:42.671475', 'step': 21588, 'epoch': 3} {'type': 'loss', 'content': 0.028400583192706108, 'timestamp': '2025-10-01 04:39:42.673836', 'step': 21589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:42.704653', 'step': 21589, 'epoch': 3} {'type': 'loss', 'content': 0.06707817316055298, 'timestamp': '2025-10-01 04:39:42.707067', 'step': 21590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:42.738798', 'step': 21590, 'epoch': 3} {'type': 'loss', 'content': 0.008515491150319576, 'timestamp': '2025-10-01 04:39:42.742176', 'step': 21591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:42.773337', 'step': 21591, 'epoch': 3} {'type': 'loss', 'content': 0.05102952569723129, 'timestamp': '2025-10-01 04:39:42.797321', 'step': 21592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:42.827807', 'step': 21592, 'epoch': 3} {'type': 'loss', 'content': 0.05184812471270561, 'timestamp': '2025-10-01 04:39:42.830221', 'step': 21593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:42.861025', 'step': 21593, 'epoch': 3} {'type': 'loss', 'content': 0.10682133585214615, 'timestamp': '2025-10-01 04:39:42.866493', 'step': 21594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:42.905728', 'step': 21594, 'epoch': 3} {'type': 'loss', 'content': 0.07520437240600586, 'timestamp': '2025-10-01 04:39:42.907816', 'step': 21595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:42.943315', 'step': 21595, 'epoch': 3} {'type': 'loss', 'content': 0.04584955424070358, 'timestamp': '2025-10-01 04:39:42.969049', 'step': 21596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:43.001851', 'step': 21596, 'epoch': 3} {'type': 'loss', 'content': 0.03553313389420509, 'timestamp': '2025-10-01 04:39:43.007659', 'step': 21597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:43.041805', 'step': 21597, 'epoch': 3} {'type': 'loss', 'content': 0.040102768689394, 'timestamp': '2025-10-01 04:39:43.044435', 'step': 21598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:43.075758', 'step': 21598, 'epoch': 3} {'type': 'loss', 'content': 0.09906961023807526, 'timestamp': '2025-10-01 04:39:43.081481', 'step': 21599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:43.114103', 'step': 21599, 'epoch': 3} {'type': 'loss', 'content': 0.05894774571061134, 'timestamp': '2025-10-01 04:39:43.138786', 'step': 21600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:43.169887', 'step': 21600, 'epoch': 3} {'type': 'loss', 'content': 0.014753921888768673, 'timestamp': '2025-10-01 04:39:43.172303', 'step': 21601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:43.203747', 'step': 21601, 'epoch': 3} {'type': 'loss', 'content': 0.06267540901899338, 'timestamp': '2025-10-01 04:39:43.206122', 'step': 21602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:43.238028', 'step': 21602, 'epoch': 3} {'type': 'loss', 'content': 0.06428162008523941, 'timestamp': '2025-10-01 04:39:43.240761', 'step': 21603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:43.272351', 'step': 21603, 'epoch': 3} {'type': 'loss', 'content': 0.06044387072324753, 'timestamp': '2025-10-01 04:39:43.295923', 'step': 21604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:43.326373', 'step': 21604, 'epoch': 3} {'type': 'loss', 'content': 0.08078335970640182, 'timestamp': '2025-10-01 04:39:43.329381', 'step': 21605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:43.364636', 'step': 21605, 'epoch': 3} {'type': 'loss', 'content': 0.015804318711161613, 'timestamp': '2025-10-01 04:39:43.366630', 'step': 21606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:43.398485', 'step': 21606, 'epoch': 3} {'type': 'loss', 'content': 0.03345922380685806, 'timestamp': '2025-10-01 04:39:43.400793', 'step': 21607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:43.435618', 'step': 21607, 'epoch': 3} {'type': 'loss', 'content': 0.06654051691293716, 'timestamp': '2025-10-01 04:39:43.459690', 'step': 21608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:43.489717', 'step': 21608, 'epoch': 3} {'type': 'loss', 'content': 0.10620132833719254, 'timestamp': '2025-10-01 04:39:43.492378', 'step': 21609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:43.523314', 'step': 21609, 'epoch': 3} {'type': 'loss', 'content': 0.11598960310220718, 'timestamp': '2025-10-01 04:39:43.525860', 'step': 21610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:43.558852', 'step': 21610, 'epoch': 3} {'type': 'loss', 'content': 0.05936284735798836, 'timestamp': '2025-10-01 04:39:43.560997', 'step': 21611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:43.591466', 'step': 21611, 'epoch': 3} {'type': 'loss', 'content': 0.025087343528866768, 'timestamp': '2025-10-01 04:39:43.615297', 'step': 21612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:43.645799', 'step': 21612, 'epoch': 3} {'type': 'loss', 'content': 0.05427844077348709, 'timestamp': '2025-10-01 04:39:43.648153', 'step': 21613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:43.678569', 'step': 21613, 'epoch': 3} {'type': 'loss', 'content': 0.07361090928316116, 'timestamp': '2025-10-01 04:39:43.681275', 'step': 21614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:43.715005', 'step': 21614, 'epoch': 3} {'type': 'loss', 'content': 0.036670420318841934, 'timestamp': '2025-10-01 04:39:43.717082', 'step': 21615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:43.751558', 'step': 21615, 'epoch': 3} {'type': 'loss', 'content': 0.0709129348397255, 'timestamp': '2025-10-01 04:39:43.775114', 'step': 21616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:43.809313', 'step': 21616, 'epoch': 3} {'type': 'loss', 'content': 0.06494349241256714, 'timestamp': '2025-10-01 04:39:43.811432', 'step': 21617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:43.843948', 'step': 21617, 'epoch': 3} {'type': 'loss', 'content': 0.08943486958742142, 'timestamp': '2025-10-01 04:39:43.846068', 'step': 21618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:43.878227', 'step': 21618, 'epoch': 3} {'type': 'loss', 'content': 0.09526825696229935, 'timestamp': '2025-10-01 04:39:43.882546', 'step': 21619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:43.920474', 'step': 21619, 'epoch': 3} {'type': 'loss', 'content': 0.020856866613030434, 'timestamp': '2025-10-01 04:39:43.944308', 'step': 21620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:43.975151', 'step': 21620, 'epoch': 3} {'type': 'loss', 'content': 0.10860559344291687, 'timestamp': '2025-10-01 04:39:43.977319', 'step': 21621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:44.008599', 'step': 21621, 'epoch': 3} {'type': 'loss', 'content': 0.06874371320009232, 'timestamp': '2025-10-01 04:39:44.014051', 'step': 21622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:44.044664', 'step': 21622, 'epoch': 3} {'type': 'loss', 'content': 0.04581304267048836, 'timestamp': '2025-10-01 04:39:44.047051', 'step': 21623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:44.078372', 'step': 21623, 'epoch': 3} {'type': 'loss', 'content': 0.053682636469602585, 'timestamp': '2025-10-01 04:39:44.103306', 'step': 21624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:44.133896', 'step': 21624, 'epoch': 3} {'type': 'loss', 'content': 0.1285688281059265, 'timestamp': '2025-10-01 04:39:44.136983', 'step': 21625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:39:44.167319', 'step': 21625, 'epoch': 3} {'type': 'loss', 'content': 0.06806416809558868, 'timestamp': '2025-10-01 04:39:44.171548', 'step': 21626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:44.201782', 'step': 21626, 'epoch': 3} {'type': 'loss', 'content': 0.07144368439912796, 'timestamp': '2025-10-01 04:39:44.203953', 'step': 21627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:44.234078', 'step': 21627, 'epoch': 3} {'type': 'loss', 'content': 0.03776061534881592, 'timestamp': '2025-10-01 04:39:44.257767', 'step': 21628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:44.287923', 'step': 21628, 'epoch': 3} {'type': 'loss', 'content': 0.018055785447359085, 'timestamp': '2025-10-01 04:39:44.289962', 'step': 21629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:44.319975', 'step': 21629, 'epoch': 3} {'type': 'loss', 'content': 0.04789024591445923, 'timestamp': '2025-10-01 04:39:44.322011', 'step': 21630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:44.352387', 'step': 21630, 'epoch': 3} {'type': 'loss', 'content': 0.13859470188617706, 'timestamp': '2025-10-01 04:39:44.354461', 'step': 21631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:44.384519', 'step': 21631, 'epoch': 3} {'type': 'loss', 'content': 0.05857403948903084, 'timestamp': '2025-10-01 04:39:44.408197', 'step': 21632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:44.439155', 'step': 21632, 'epoch': 3} {'type': 'loss', 'content': 0.050592631101608276, 'timestamp': '2025-10-01 04:39:44.441082', 'step': 21633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:44.471657', 'step': 21633, 'epoch': 3} {'type': 'loss', 'content': 0.09839777648448944, 'timestamp': '2025-10-01 04:39:44.473776', 'step': 21634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:44.504194', 'step': 21634, 'epoch': 3} {'type': 'loss', 'content': 0.05008150637149811, 'timestamp': '2025-10-01 04:39:44.506667', 'step': 21635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:44.537382', 'step': 21635, 'epoch': 3} {'type': 'loss', 'content': 0.1015469878911972, 'timestamp': '2025-10-01 04:39:44.561070', 'step': 21636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-10-01 04:39:44.591572', 'step': 21636, 'epoch': 3} {'type': 'loss', 'content': 0.06429778784513474, 'timestamp': '2025-10-01 04:39:44.596708', 'step': 21637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:44.628347', 'step': 21637, 'epoch': 3} {'type': 'loss', 'content': 0.05540447682142258, 'timestamp': '2025-10-01 04:39:44.630402', 'step': 21638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:44.661540', 'step': 21638, 'epoch': 3} {'type': 'loss', 'content': 0.07435231655836105, 'timestamp': '2025-10-01 04:39:44.664073', 'step': 21639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:44.699506', 'step': 21639, 'epoch': 3} {'type': 'loss', 'content': 0.06129445135593414, 'timestamp': '2025-10-01 04:39:44.722981', 'step': 21640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:44.752564', 'step': 21640, 'epoch': 3} {'type': 'loss', 'content': 0.04044802114367485, 'timestamp': '2025-10-01 04:39:44.754721', 'step': 21641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:44.784544', 'step': 21641, 'epoch': 3} {'type': 'loss', 'content': 0.06530077010393143, 'timestamp': '2025-10-01 04:39:44.786671', 'step': 21642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:44.816615', 'step': 21642, 'epoch': 3} {'type': 'loss', 'content': 0.045718707144260406, 'timestamp': '2025-10-01 04:39:44.818854', 'step': 21643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:44.849287', 'step': 21643, 'epoch': 3} {'type': 'loss', 'content': 0.04790205508470535, 'timestamp': '2025-10-01 04:39:44.872756', 'step': 21644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:44.903083', 'step': 21644, 'epoch': 3} {'type': 'loss', 'content': 0.022759847342967987, 'timestamp': '2025-10-01 04:39:44.906762', 'step': 21645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:44.936985', 'step': 21645, 'epoch': 3} {'type': 'loss', 'content': 0.03858322650194168, 'timestamp': '2025-10-01 04:39:44.939829', 'step': 21646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:44.976048', 'step': 21646, 'epoch': 3} {'type': 'loss', 'content': 0.03528899326920509, 'timestamp': '2025-10-01 04:39:44.978128', 'step': 21647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:45.009029', 'step': 21647, 'epoch': 3} {'type': 'loss', 'content': 0.05523372441530228, 'timestamp': '2025-10-01 04:39:45.032592', 'step': 21648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:45.062884', 'step': 21648, 'epoch': 3} {'type': 'loss', 'content': 0.0563935786485672, 'timestamp': '2025-10-01 04:39:45.065058', 'step': 21649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:45.097842', 'step': 21649, 'epoch': 3} {'type': 'loss', 'content': 0.09454032778739929, 'timestamp': '2025-10-01 04:39:45.102227', 'step': 21650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:45.133616', 'step': 21650, 'epoch': 3} {'type': 'loss', 'content': 0.07808513939380646, 'timestamp': '2025-10-01 04:39:45.135745', 'step': 21651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:45.166034', 'step': 21651, 'epoch': 3} {'type': 'loss', 'content': 0.058418791741132736, 'timestamp': '2025-10-01 04:39:45.191211', 'step': 21652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:45.221807', 'step': 21652, 'epoch': 3} {'type': 'loss', 'content': 0.030658263713121414, 'timestamp': '2025-10-01 04:39:45.228960', 'step': 21653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.260101', 'step': 21653, 'epoch': 3} {'type': 'loss', 'content': 0.003928692080080509, 'timestamp': '2025-10-01 04:39:45.262829', 'step': 21654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.292960', 'step': 21654, 'epoch': 3} {'type': 'loss', 'content': 0.020207513123750687, 'timestamp': '2025-10-01 04:39:45.295118', 'step': 21655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.327792', 'step': 21655, 'epoch': 3} {'type': 'loss', 'content': 0.06907213479280472, 'timestamp': '2025-10-01 04:39:45.351842', 'step': 21656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.381987', 'step': 21656, 'epoch': 3} {'type': 'loss', 'content': 0.06500624120235443, 'timestamp': '2025-10-01 04:39:45.384085', 'step': 21657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.414395', 'step': 21657, 'epoch': 3} {'type': 'loss', 'content': 0.09929777681827545, 'timestamp': '2025-10-01 04:39:45.417023', 'step': 21658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:45.447622', 'step': 21658, 'epoch': 3} {'type': 'loss', 'content': 0.03065875545144081, 'timestamp': '2025-10-01 04:39:45.449800', 'step': 21659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:45.480315', 'step': 21659, 'epoch': 3} {'type': 'loss', 'content': 0.07269712537527084, 'timestamp': '2025-10-01 04:39:45.504396', 'step': 21660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.534417', 'step': 21660, 'epoch': 3} {'type': 'loss', 'content': 0.07888936996459961, 'timestamp': '2025-10-01 04:39:45.536654', 'step': 21661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:45.568381', 'step': 21661, 'epoch': 3} {'type': 'loss', 'content': 0.045610859990119934, 'timestamp': '2025-10-01 04:39:45.571333', 'step': 21662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:45.606040', 'step': 21662, 'epoch': 3} {'type': 'loss', 'content': 0.07347944378852844, 'timestamp': '2025-10-01 04:39:45.608296', 'step': 21663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.639246', 'step': 21663, 'epoch': 3} {'type': 'loss', 'content': 0.04946361109614372, 'timestamp': '2025-10-01 04:39:45.662964', 'step': 21664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:45.693973', 'step': 21664, 'epoch': 3} {'type': 'loss', 'content': 0.030446045100688934, 'timestamp': '2025-10-01 04:39:45.696951', 'step': 21665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.728788', 'step': 21665, 'epoch': 3} {'type': 'loss', 'content': 0.06222778558731079, 'timestamp': '2025-10-01 04:39:45.731093', 'step': 21666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:45.765147', 'step': 21666, 'epoch': 3} {'type': 'loss', 'content': 0.07484278827905655, 'timestamp': '2025-10-01 04:39:45.768339', 'step': 21667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:45.798464', 'step': 21667, 'epoch': 3} {'type': 'loss', 'content': 0.06307919323444366, 'timestamp': '2025-10-01 04:39:45.822255', 'step': 21668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.852928', 'step': 21668, 'epoch': 3} {'type': 'loss', 'content': 0.04582137614488602, 'timestamp': '2025-10-01 04:39:45.855626', 'step': 21669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:45.886182', 'step': 21669, 'epoch': 3} {'type': 'loss', 'content': 0.12668880820274353, 'timestamp': '2025-10-01 04:39:45.888429', 'step': 21670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.918713', 'step': 21670, 'epoch': 3} {'type': 'loss', 'content': 0.01954578422009945, 'timestamp': '2025-10-01 04:39:45.921067', 'step': 21671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:45.951408', 'step': 21671, 'epoch': 3} {'type': 'loss', 'content': 0.08474268019199371, 'timestamp': '2025-10-01 04:39:45.975663', 'step': 21672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:46.005949', 'step': 21672, 'epoch': 3} {'type': 'loss', 'content': 0.054387353360652924, 'timestamp': '2025-10-01 04:39:46.008435', 'step': 21673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:46.039128', 'step': 21673, 'epoch': 3} {'type': 'loss', 'content': 0.028813518583774567, 'timestamp': '2025-10-01 04:39:46.041389', 'step': 21674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:46.080748', 'step': 21674, 'epoch': 3} {'type': 'loss', 'content': 0.032527122646570206, 'timestamp': '2025-10-01 04:39:46.083647', 'step': 21675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:46.114527', 'step': 21675, 'epoch': 3} {'type': 'loss', 'content': 0.0755997747182846, 'timestamp': '2025-10-01 04:39:46.138798', 'step': 21676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:46.169284', 'step': 21676, 'epoch': 3} {'type': 'loss', 'content': 0.08780617266893387, 'timestamp': '2025-10-01 04:39:46.171346', 'step': 21677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:46.201806', 'step': 21677, 'epoch': 3} {'type': 'loss', 'content': 0.02395908161997795, 'timestamp': '2025-10-01 04:39:46.203989', 'step': 21678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:46.234116', 'step': 21678, 'epoch': 3} {'type': 'loss', 'content': 0.07020871341228485, 'timestamp': '2025-10-01 04:39:46.236757', 'step': 21679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:46.268052', 'step': 21679, 'epoch': 3} {'type': 'loss', 'content': 0.05338989943265915, 'timestamp': '2025-10-01 04:39:46.291651', 'step': 21680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:46.322573', 'step': 21680, 'epoch': 3} {'type': 'loss', 'content': 0.12555357813835144, 'timestamp': '2025-10-01 04:39:46.324790', 'step': 21681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:46.355372', 'step': 21681, 'epoch': 3} {'type': 'loss', 'content': 0.1072077676653862, 'timestamp': '2025-10-01 04:39:46.357561', 'step': 21682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:46.388485', 'step': 21682, 'epoch': 3} {'type': 'loss', 'content': 0.039762843400239944, 'timestamp': '2025-10-01 04:39:46.390754', 'step': 21683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:46.425847', 'step': 21683, 'epoch': 3} {'type': 'loss', 'content': 0.05393445864319801, 'timestamp': '2025-10-01 04:39:46.449514', 'step': 21684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:46.480390', 'step': 21684, 'epoch': 3} {'type': 'loss', 'content': 0.10920248180627823, 'timestamp': '2025-10-01 04:39:46.482506', 'step': 21685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:46.513617', 'step': 21685, 'epoch': 3} {'type': 'loss', 'content': 0.1344400942325592, 'timestamp': '2025-10-01 04:39:46.515650', 'step': 21686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:46.545519', 'step': 21686, 'epoch': 3} {'type': 'loss', 'content': 0.061312898993492126, 'timestamp': '2025-10-01 04:39:46.547732', 'step': 21687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:46.578268', 'step': 21687, 'epoch': 3} {'type': 'loss', 'content': 0.08191270381212234, 'timestamp': '2025-10-01 04:39:46.601932', 'step': 21688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:46.632740', 'step': 21688, 'epoch': 3} {'type': 'loss', 'content': 0.023181961849331856, 'timestamp': '2025-10-01 04:39:46.634995', 'step': 21689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:46.667042', 'step': 21689, 'epoch': 3} {'type': 'loss', 'content': 0.08425537496805191, 'timestamp': '2025-10-01 04:39:46.669654', 'step': 21690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:46.701128', 'step': 21690, 'epoch': 3} {'type': 'loss', 'content': 0.06998339295387268, 'timestamp': '2025-10-01 04:39:46.703470', 'step': 21691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:46.736876', 'step': 21691, 'epoch': 3} {'type': 'loss', 'content': 0.08871393650770187, 'timestamp': '2025-10-01 04:39:46.760363', 'step': 21692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:46.791282', 'step': 21692, 'epoch': 3} {'type': 'loss', 'content': 0.03156837075948715, 'timestamp': '2025-10-01 04:39:46.793482', 'step': 21693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:46.828803', 'step': 21693, 'epoch': 3} {'type': 'loss', 'content': 0.06236517056822777, 'timestamp': '2025-10-01 04:39:46.831309', 'step': 21694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:46.863458', 'step': 21694, 'epoch': 3} {'type': 'loss', 'content': 0.03172693029046059, 'timestamp': '2025-10-01 04:39:46.865626', 'step': 21695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:46.896562', 'step': 21695, 'epoch': 3} {'type': 'loss', 'content': 0.06618449091911316, 'timestamp': '2025-10-01 04:39:46.921720', 'step': 21696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:46.952795', 'step': 21696, 'epoch': 3} {'type': 'loss', 'content': 0.042662013322114944, 'timestamp': '2025-10-01 04:39:46.954945', 'step': 21697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:46.985276', 'step': 21697, 'epoch': 3} {'type': 'loss', 'content': 0.03580683469772339, 'timestamp': '2025-10-01 04:39:46.990453', 'step': 21698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:47.021805', 'step': 21698, 'epoch': 3} {'type': 'loss', 'content': 0.03974064439535141, 'timestamp': '2025-10-01 04:39:47.024923', 'step': 21699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.055330', 'step': 21699, 'epoch': 3} {'type': 'loss', 'content': 0.06870600581169128, 'timestamp': '2025-10-01 04:39:47.079047', 'step': 21700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:47.110515', 'step': 21700, 'epoch': 3} {'type': 'loss', 'content': 0.01996329426765442, 'timestamp': '2025-10-01 04:39:47.112779', 'step': 21701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:47.143283', 'step': 21701, 'epoch': 3} {'type': 'loss', 'content': 0.05688781291246414, 'timestamp': '2025-10-01 04:39:47.146327', 'step': 21702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:47.178241', 'step': 21702, 'epoch': 3} {'type': 'loss', 'content': 0.1192435771226883, 'timestamp': '2025-10-01 04:39:47.180545', 'step': 21703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.210925', 'step': 21703, 'epoch': 3} {'type': 'loss', 'content': 0.07289064675569534, 'timestamp': '2025-10-01 04:39:47.235481', 'step': 21704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:47.265938', 'step': 21704, 'epoch': 3} {'type': 'loss', 'content': 0.05429830029606819, 'timestamp': '2025-10-01 04:39:47.268191', 'step': 21705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:47.300484', 'step': 21705, 'epoch': 3} {'type': 'loss', 'content': 0.05072551593184471, 'timestamp': '2025-10-01 04:39:47.303030', 'step': 21706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:47.336968', 'step': 21706, 'epoch': 3} {'type': 'loss', 'content': 0.11876997351646423, 'timestamp': '2025-10-01 04:39:47.339281', 'step': 21707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.370844', 'step': 21707, 'epoch': 3} {'type': 'loss', 'content': 0.0748925432562828, 'timestamp': '2025-10-01 04:39:47.394399', 'step': 21708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:47.424850', 'step': 21708, 'epoch': 3} {'type': 'loss', 'content': 0.06446918100118637, 'timestamp': '2025-10-01 04:39:47.427238', 'step': 21709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.458280', 'step': 21709, 'epoch': 3} {'type': 'loss', 'content': 0.0712568536400795, 'timestamp': '2025-10-01 04:39:47.460459', 'step': 21710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.491034', 'step': 21710, 'epoch': 3} {'type': 'loss', 'content': 0.09801258891820908, 'timestamp': '2025-10-01 04:39:47.493418', 'step': 21711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:47.524213', 'step': 21711, 'epoch': 3} {'type': 'loss', 'content': 0.04032014310359955, 'timestamp': '2025-10-01 04:39:47.548363', 'step': 21712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:47.579528', 'step': 21712, 'epoch': 3} {'type': 'loss', 'content': 0.0625513419508934, 'timestamp': '2025-10-01 04:39:47.581635', 'step': 21713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:47.612032', 'step': 21713, 'epoch': 3} {'type': 'loss', 'content': 0.0730104148387909, 'timestamp': '2025-10-01 04:39:47.614293', 'step': 21714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:47.646393', 'step': 21714, 'epoch': 3} {'type': 'loss', 'content': 0.032708942890167236, 'timestamp': '2025-10-01 04:39:47.648854', 'step': 21715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.680869', 'step': 21715, 'epoch': 3} {'type': 'loss', 'content': 0.07713158428668976, 'timestamp': '2025-10-01 04:39:47.705685', 'step': 21716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:47.736774', 'step': 21716, 'epoch': 3} {'type': 'loss', 'content': 0.030574258416891098, 'timestamp': '2025-10-01 04:39:47.739230', 'step': 21717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.770318', 'step': 21717, 'epoch': 3} {'type': 'loss', 'content': 0.10604298114776611, 'timestamp': '2025-10-01 04:39:47.772568', 'step': 21718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.802961', 'step': 21718, 'epoch': 3} {'type': 'loss', 'content': 0.06680713593959808, 'timestamp': '2025-10-01 04:39:47.805202', 'step': 21719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:47.835347', 'step': 21719, 'epoch': 3} {'type': 'loss', 'content': 0.11102139949798584, 'timestamp': '2025-10-01 04:39:47.859753', 'step': 21720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.889900', 'step': 21720, 'epoch': 3} {'type': 'loss', 'content': 0.12141622602939606, 'timestamp': '2025-10-01 04:39:47.892186', 'step': 21721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:47.922801', 'step': 21721, 'epoch': 3} {'type': 'loss', 'content': 0.12795430421829224, 'timestamp': '2025-10-01 04:39:47.924859', 'step': 21722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.955146', 'step': 21722, 'epoch': 3} {'type': 'loss', 'content': 0.12136193364858627, 'timestamp': '2025-10-01 04:39:47.957241', 'step': 21723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:47.988842', 'step': 21723, 'epoch': 3} {'type': 'loss', 'content': 0.06140851974487305, 'timestamp': '2025-10-01 04:39:48.012482', 'step': 21724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:48.044104', 'step': 21724, 'epoch': 3} {'type': 'loss', 'content': 0.0950174406170845, 'timestamp': '2025-10-01 04:39:48.046286', 'step': 21725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:48.076888', 'step': 21725, 'epoch': 3} {'type': 'loss', 'content': 0.0534152090549469, 'timestamp': '2025-10-01 04:39:48.078999', 'step': 21726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:48.110679', 'step': 21726, 'epoch': 3} {'type': 'loss', 'content': 0.02840566448867321, 'timestamp': '2025-10-01 04:39:48.113070', 'step': 21727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:48.146523', 'step': 21727, 'epoch': 3} {'type': 'loss', 'content': 0.030997727066278458, 'timestamp': '2025-10-01 04:39:48.170612', 'step': 21728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:48.203842', 'step': 21728, 'epoch': 3} {'type': 'loss', 'content': 0.06995471566915512, 'timestamp': '2025-10-01 04:39:48.206062', 'step': 21729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:48.236180', 'step': 21729, 'epoch': 3} {'type': 'loss', 'content': 0.07662340998649597, 'timestamp': '2025-10-01 04:39:48.238615', 'step': 21730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:48.268892', 'step': 21730, 'epoch': 3} {'type': 'loss', 'content': 0.024038994684815407, 'timestamp': '2025-10-01 04:39:48.271004', 'step': 21731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:48.303400', 'step': 21731, 'epoch': 3} {'type': 'loss', 'content': 0.11782647669315338, 'timestamp': '2025-10-01 04:39:48.327016', 'step': 21732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:48.357685', 'step': 21732, 'epoch': 3} {'type': 'loss', 'content': 0.02655654214322567, 'timestamp': '2025-10-01 04:39:48.360375', 'step': 21733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:48.391098', 'step': 21733, 'epoch': 3} {'type': 'loss', 'content': 0.015469580888748169, 'timestamp': '2025-10-01 04:39:48.393374', 'step': 21734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:48.433220', 'step': 21734, 'epoch': 3} {'type': 'loss', 'content': 0.034476056694984436, 'timestamp': '2025-10-01 04:39:48.435399', 'step': 21735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:48.465913', 'step': 21735, 'epoch': 3} {'type': 'loss', 'content': 0.057994239032268524, 'timestamp': '2025-10-01 04:39:48.489640', 'step': 21736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:48.520101', 'step': 21736, 'epoch': 3} {'type': 'loss', 'content': 0.05982606112957001, 'timestamp': '2025-10-01 04:39:48.523750', 'step': 21737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:48.567680', 'step': 21737, 'epoch': 3} {'type': 'loss', 'content': 0.04152626916766167, 'timestamp': '2025-10-01 04:39:48.576539', 'step': 21738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:48.608790', 'step': 21738, 'epoch': 3} {'type': 'loss', 'content': 0.051615867763757706, 'timestamp': '2025-10-01 04:39:48.611619', 'step': 21739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:48.644066', 'step': 21739, 'epoch': 3} {'type': 'loss', 'content': 0.041172903031110764, 'timestamp': '2025-10-01 04:39:48.668528', 'step': 21740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:48.698926', 'step': 21740, 'epoch': 3} {'type': 'loss', 'content': 0.08827614784240723, 'timestamp': '2025-10-01 04:39:48.701865', 'step': 21741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:48.732063', 'step': 21741, 'epoch': 3} {'type': 'loss', 'content': 0.08534253388643265, 'timestamp': '2025-10-01 04:39:48.734606', 'step': 21742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:48.765595', 'step': 21742, 'epoch': 3} {'type': 'loss', 'content': 0.06618800759315491, 'timestamp': '2025-10-01 04:39:48.768310', 'step': 21743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:48.800131', 'step': 21743, 'epoch': 3} {'type': 'loss', 'content': 0.08851920813322067, 'timestamp': '2025-10-01 04:39:48.823674', 'step': 21744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:48.853875', 'step': 21744, 'epoch': 3} {'type': 'loss', 'content': 0.059352703392505646, 'timestamp': '2025-10-01 04:39:48.856161', 'step': 21745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:48.886607', 'step': 21745, 'epoch': 3} {'type': 'loss', 'content': 0.12199154496192932, 'timestamp': '2025-10-01 04:39:48.889276', 'step': 21746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:48.920189', 'step': 21746, 'epoch': 3} {'type': 'loss', 'content': 0.09623787552118301, 'timestamp': '2025-10-01 04:39:48.922297', 'step': 21747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:48.953850', 'step': 21747, 'epoch': 3} {'type': 'loss', 'content': 0.05750323086977005, 'timestamp': '2025-10-01 04:39:48.978688', 'step': 21748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:49.011240', 'step': 21748, 'epoch': 3} {'type': 'loss', 'content': 0.060763370245695114, 'timestamp': '2025-10-01 04:39:49.014902', 'step': 21749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:49.046679', 'step': 21749, 'epoch': 3} {'type': 'loss', 'content': 0.10008161514997482, 'timestamp': '2025-10-01 04:39:49.049610', 'step': 21750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:49.080692', 'step': 21750, 'epoch': 3} {'type': 'loss', 'content': 0.09323382377624512, 'timestamp': '2025-10-01 04:39:49.082916', 'step': 21751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:49.115886', 'step': 21751, 'epoch': 3} {'type': 'loss', 'content': 0.042137276381254196, 'timestamp': '2025-10-01 04:39:49.144046', 'step': 21752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:49.176161', 'step': 21752, 'epoch': 3} {'type': 'loss', 'content': 0.08008307218551636, 'timestamp': '2025-10-01 04:39:49.178913', 'step': 21753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:49.211252', 'step': 21753, 'epoch': 3} {'type': 'loss', 'content': 0.05215846002101898, 'timestamp': '2025-10-01 04:39:49.213632', 'step': 21754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:49.245237', 'step': 21754, 'epoch': 3} {'type': 'loss', 'content': 0.06078954041004181, 'timestamp': '2025-10-01 04:39:49.247780', 'step': 21755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:49.281697', 'step': 21755, 'epoch': 3} {'type': 'loss', 'content': 0.06925135850906372, 'timestamp': '2025-10-01 04:39:49.305459', 'step': 21756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:49.341198', 'step': 21756, 'epoch': 3} {'type': 'loss', 'content': 0.07044341415166855, 'timestamp': '2025-10-01 04:39:49.344166', 'step': 21757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:49.376000', 'step': 21757, 'epoch': 3} {'type': 'loss', 'content': 0.15038667619228363, 'timestamp': '2025-10-01 04:39:49.378218', 'step': 21758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:49.413679', 'step': 21758, 'epoch': 3} {'type': 'loss', 'content': 0.08247511833906174, 'timestamp': '2025-10-01 04:39:49.416331', 'step': 21759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:49.447930', 'step': 21759, 'epoch': 3} {'type': 'loss', 'content': 0.04297030717134476, 'timestamp': '2025-10-01 04:39:49.471584', 'step': 21760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:49.505116', 'step': 21760, 'epoch': 3} {'type': 'loss', 'content': 0.1163853108882904, 'timestamp': '2025-10-01 04:39:49.510677', 'step': 21761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:49.542162', 'step': 21761, 'epoch': 3} {'type': 'loss', 'content': 0.02944858931005001, 'timestamp': '2025-10-01 04:39:49.544264', 'step': 21762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:49.577066', 'step': 21762, 'epoch': 3} {'type': 'loss', 'content': 0.06052624434232712, 'timestamp': '2025-10-01 04:39:49.579515', 'step': 21763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:49.611725', 'step': 21763, 'epoch': 3} {'type': 'loss', 'content': 0.057490572333335876, 'timestamp': '2025-10-01 04:39:49.635437', 'step': 21764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:49.667944', 'step': 21764, 'epoch': 3} {'type': 'loss', 'content': 0.07954230904579163, 'timestamp': '2025-10-01 04:39:49.670878', 'step': 21765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:49.701705', 'step': 21765, 'epoch': 3} {'type': 'loss', 'content': 0.04731200635433197, 'timestamp': '2025-10-01 04:39:49.703839', 'step': 21766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:49.735587', 'step': 21766, 'epoch': 3} {'type': 'loss', 'content': 0.04378693550825119, 'timestamp': '2025-10-01 04:39:49.738957', 'step': 21767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:49.769527', 'step': 21767, 'epoch': 3} {'type': 'loss', 'content': 0.07329745590686798, 'timestamp': '2025-10-01 04:39:49.794669', 'step': 21768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:49.826072', 'step': 21768, 'epoch': 3} {'type': 'loss', 'content': 0.059612520039081573, 'timestamp': '2025-10-01 04:39:49.828323', 'step': 21769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:49.860465', 'step': 21769, 'epoch': 3} {'type': 'loss', 'content': 0.05126611143350601, 'timestamp': '2025-10-01 04:39:49.863689', 'step': 21770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:49.895799', 'step': 21770, 'epoch': 3} {'type': 'loss', 'content': 0.020743342116475105, 'timestamp': '2025-10-01 04:39:49.898546', 'step': 21771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:49.929015', 'step': 21771, 'epoch': 3} {'type': 'loss', 'content': 0.02342616394162178, 'timestamp': '2025-10-01 04:39:49.952748', 'step': 21772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:49.989535', 'step': 21772, 'epoch': 3} {'type': 'loss', 'content': 0.043838534504175186, 'timestamp': '2025-10-01 04:39:49.991907', 'step': 21773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:50.023561', 'step': 21773, 'epoch': 3} {'type': 'loss', 'content': 0.05784599855542183, 'timestamp': '2025-10-01 04:39:50.032538', 'step': 21774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:50.063493', 'step': 21774, 'epoch': 3} {'type': 'loss', 'content': 0.053673166781663895, 'timestamp': '2025-10-01 04:39:50.068817', 'step': 21775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:50.099682', 'step': 21775, 'epoch': 3} {'type': 'loss', 'content': 0.044187724590301514, 'timestamp': '2025-10-01 04:39:50.123728', 'step': 21776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:50.162313', 'step': 21776, 'epoch': 3} {'type': 'loss', 'content': 0.017145652323961258, 'timestamp': '2025-10-01 04:39:50.164917', 'step': 21777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:50.195598', 'step': 21777, 'epoch': 3} {'type': 'loss', 'content': 0.07096874713897705, 'timestamp': '2025-10-01 04:39:50.197935', 'step': 21778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:50.228707', 'step': 21778, 'epoch': 3} {'type': 'loss', 'content': 0.07318882644176483, 'timestamp': '2025-10-01 04:39:50.230886', 'step': 21779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:50.262155', 'step': 21779, 'epoch': 3} {'type': 'loss', 'content': 0.05745382979512215, 'timestamp': '2025-10-01 04:39:50.286794', 'step': 21780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:50.319710', 'step': 21780, 'epoch': 3} {'type': 'loss', 'content': 0.07080616056919098, 'timestamp': '2025-10-01 04:39:50.321757', 'step': 21781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:50.354389', 'step': 21781, 'epoch': 3} {'type': 'loss', 'content': 0.04678128659725189, 'timestamp': '2025-10-01 04:39:50.356642', 'step': 21782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:50.387460', 'step': 21782, 'epoch': 3} {'type': 'loss', 'content': 0.11735539138317108, 'timestamp': '2025-10-01 04:39:50.389887', 'step': 21783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:50.426730', 'step': 21783, 'epoch': 3} {'type': 'loss', 'content': 0.11442367732524872, 'timestamp': '2025-10-01 04:39:50.450251', 'step': 21784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:50.481426', 'step': 21784, 'epoch': 3} {'type': 'loss', 'content': 0.03484027087688446, 'timestamp': '2025-10-01 04:39:50.483607', 'step': 21785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:50.514618', 'step': 21785, 'epoch': 3} {'type': 'loss', 'content': 0.058222681283950806, 'timestamp': '2025-10-01 04:39:50.517908', 'step': 21786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:50.555660', 'step': 21786, 'epoch': 3} {'type': 'loss', 'content': 0.0843135416507721, 'timestamp': '2025-10-01 04:39:50.559402', 'step': 21787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:50.592779', 'step': 21787, 'epoch': 3} {'type': 'loss', 'content': 0.062216952443122864, 'timestamp': '2025-10-01 04:39:50.616644', 'step': 21788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:50.649890', 'step': 21788, 'epoch': 3} {'type': 'loss', 'content': 0.08308247476816177, 'timestamp': '2025-10-01 04:39:50.652691', 'step': 21789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:50.688540', 'step': 21789, 'epoch': 3} {'type': 'loss', 'content': 0.02426474168896675, 'timestamp': '2025-10-01 04:39:50.691134', 'step': 21790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:50.724903', 'step': 21790, 'epoch': 3} {'type': 'loss', 'content': 0.09688328951597214, 'timestamp': '2025-10-01 04:39:50.727540', 'step': 21791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:50.758796', 'step': 21791, 'epoch': 3} {'type': 'loss', 'content': 0.11117777228355408, 'timestamp': '2025-10-01 04:39:50.782686', 'step': 21792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:50.813389', 'step': 21792, 'epoch': 3} {'type': 'loss', 'content': 0.10409349948167801, 'timestamp': '2025-10-01 04:39:50.815652', 'step': 21793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:50.847035', 'step': 21793, 'epoch': 3} {'type': 'loss', 'content': 0.08199141919612885, 'timestamp': '2025-10-01 04:39:50.849642', 'step': 21794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:50.879966', 'step': 21794, 'epoch': 3} {'type': 'loss', 'content': 0.1046927347779274, 'timestamp': '2025-10-01 04:39:50.882432', 'step': 21795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:50.912926', 'step': 21795, 'epoch': 3} {'type': 'loss', 'content': 0.043238915503025055, 'timestamp': '2025-10-01 04:39:50.937203', 'step': 21796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:50.969014', 'step': 21796, 'epoch': 3} {'type': 'loss', 'content': 0.08957688510417938, 'timestamp': '2025-10-01 04:39:50.971332', 'step': 21797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:51.002846', 'step': 21797, 'epoch': 3} {'type': 'loss', 'content': 0.04940802603960037, 'timestamp': '2025-10-01 04:39:51.006110', 'step': 21798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:51.036470', 'step': 21798, 'epoch': 3} {'type': 'loss', 'content': 0.04913340136408806, 'timestamp': '2025-10-01 04:39:51.038788', 'step': 21799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:51.069307', 'step': 21799, 'epoch': 3} {'type': 'loss', 'content': 0.04480334371328354, 'timestamp': '2025-10-01 04:39:51.093573', 'step': 21800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:51.124612', 'step': 21800, 'epoch': 3} {'type': 'loss', 'content': 0.0485517643392086, 'timestamp': '2025-10-01 04:39:51.128645', 'step': 21801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:51.158942', 'step': 21801, 'epoch': 3} {'type': 'loss', 'content': 0.11117450147867203, 'timestamp': '2025-10-01 04:39:51.161740', 'step': 21802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:51.191846', 'step': 21802, 'epoch': 3} {'type': 'loss', 'content': 0.03197106719017029, 'timestamp': '2025-10-01 04:39:51.194116', 'step': 21803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:51.224748', 'step': 21803, 'epoch': 3} {'type': 'loss', 'content': 0.09329088032245636, 'timestamp': '2025-10-01 04:39:51.248851', 'step': 21804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:51.279863', 'step': 21804, 'epoch': 3} {'type': 'loss', 'content': 0.07863609492778778, 'timestamp': '2025-10-01 04:39:51.282162', 'step': 21805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:51.312630', 'step': 21805, 'epoch': 3} {'type': 'loss', 'content': 0.079895980656147, 'timestamp': '2025-10-01 04:39:51.315208', 'step': 21806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:51.345681', 'step': 21806, 'epoch': 3} {'type': 'loss', 'content': 0.017696011811494827, 'timestamp': '2025-10-01 04:39:51.349597', 'step': 21807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:39:51.383403', 'step': 21807, 'epoch': 3} {'type': 'loss', 'content': 0.049698323011398315, 'timestamp': '2025-10-01 04:39:51.408746', 'step': 21808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:51.439204', 'step': 21808, 'epoch': 3} {'type': 'loss', 'content': 0.031535372138023376, 'timestamp': '2025-10-01 04:39:51.441473', 'step': 21809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:51.472131', 'step': 21809, 'epoch': 3} {'type': 'loss', 'content': 0.02890406735241413, 'timestamp': '2025-10-01 04:39:51.474769', 'step': 21810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:51.506558', 'step': 21810, 'epoch': 3} {'type': 'loss', 'content': 0.019090067595243454, 'timestamp': '2025-10-01 04:39:51.509289', 'step': 21811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:51.540114', 'step': 21811, 'epoch': 3} {'type': 'loss', 'content': 0.054647061973810196, 'timestamp': '2025-10-01 04:39:51.564323', 'step': 21812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:51.594912', 'step': 21812, 'epoch': 3} {'type': 'loss', 'content': 0.1406134068965912, 'timestamp': '2025-10-01 04:39:51.597732', 'step': 21813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:51.628605', 'step': 21813, 'epoch': 3} {'type': 'loss', 'content': 0.047628700733184814, 'timestamp': '2025-10-01 04:39:51.632327', 'step': 21814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:51.663074', 'step': 21814, 'epoch': 3} {'type': 'loss', 'content': 0.06078809127211571, 'timestamp': '2025-10-01 04:39:51.665961', 'step': 21815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:51.696987', 'step': 21815, 'epoch': 3} {'type': 'loss', 'content': 0.06652506440877914, 'timestamp': '2025-10-01 04:39:51.720964', 'step': 21816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:51.753056', 'step': 21816, 'epoch': 3} {'type': 'loss', 'content': 0.09383588284254074, 'timestamp': '2025-10-01 04:39:51.755642', 'step': 21817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:51.786918', 'step': 21817, 'epoch': 3} {'type': 'loss', 'content': 0.09101559966802597, 'timestamp': '2025-10-01 04:39:51.789419', 'step': 21818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:51.819907', 'step': 21818, 'epoch': 3} {'type': 'loss', 'content': 0.07396754622459412, 'timestamp': '2025-10-01 04:39:51.822403', 'step': 21819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:51.854503', 'step': 21819, 'epoch': 3} {'type': 'loss', 'content': 0.0314200259745121, 'timestamp': '2025-10-01 04:39:51.878330', 'step': 21820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:51.908670', 'step': 21820, 'epoch': 3} {'type': 'loss', 'content': 0.02220986597239971, 'timestamp': '2025-10-01 04:39:51.911081', 'step': 21821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:51.941978', 'step': 21821, 'epoch': 3} {'type': 'loss', 'content': 0.1478566825389862, 'timestamp': '2025-10-01 04:39:51.961163', 'step': 21822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:51.991688', 'step': 21822, 'epoch': 3} {'type': 'loss', 'content': 0.09180810302495956, 'timestamp': '2025-10-01 04:39:51.994458', 'step': 21823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:52.025315', 'step': 21823, 'epoch': 3} {'type': 'loss', 'content': 0.022879326716065407, 'timestamp': '2025-10-01 04:39:52.049246', 'step': 21824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:52.079892', 'step': 21824, 'epoch': 3} {'type': 'loss', 'content': 0.08983615785837173, 'timestamp': '2025-10-01 04:39:52.083947', 'step': 21825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:52.114980', 'step': 21825, 'epoch': 3} {'type': 'loss', 'content': 0.10098163038492203, 'timestamp': '2025-10-01 04:39:52.117430', 'step': 21826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:39:52.148264', 'step': 21826, 'epoch': 3} {'type': 'loss', 'content': 0.06437996029853821, 'timestamp': '2025-10-01 04:39:52.151065', 'step': 21827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:52.181501', 'step': 21827, 'epoch': 3} {'type': 'loss', 'content': 0.03999925032258034, 'timestamp': '2025-10-01 04:39:52.205263', 'step': 21828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:52.236598', 'step': 21828, 'epoch': 3} {'type': 'loss', 'content': 0.0382707454264164, 'timestamp': '2025-10-01 04:39:52.239132', 'step': 21829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:52.271553', 'step': 21829, 'epoch': 3} {'type': 'loss', 'content': 0.1187216117978096, 'timestamp': '2025-10-01 04:39:52.273701', 'step': 21830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:52.304903', 'step': 21830, 'epoch': 3} {'type': 'loss', 'content': 0.04163350909948349, 'timestamp': '2025-10-01 04:39:52.307735', 'step': 21831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:52.337926', 'step': 21831, 'epoch': 3} {'type': 'loss', 'content': 0.05393943935632706, 'timestamp': '2025-10-01 04:39:52.361510', 'step': 21832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:52.392274', 'step': 21832, 'epoch': 3} {'type': 'loss', 'content': 0.031809452921152115, 'timestamp': '2025-10-01 04:39:52.394540', 'step': 21833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:52.424908', 'step': 21833, 'epoch': 3} {'type': 'loss', 'content': 0.009959422051906586, 'timestamp': '2025-10-01 04:39:52.427243', 'step': 21834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:52.457816', 'step': 21834, 'epoch': 3} {'type': 'loss', 'content': 0.09702400863170624, 'timestamp': '2025-10-01 04:39:52.460474', 'step': 21835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:52.490379', 'step': 21835, 'epoch': 3} {'type': 'loss', 'content': 0.059034377336502075, 'timestamp': '2025-10-01 04:39:52.513937', 'step': 21836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:52.544919', 'step': 21836, 'epoch': 3} {'type': 'loss', 'content': 0.04804389178752899, 'timestamp': '2025-10-01 04:39:52.546941', 'step': 21837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:52.577316', 'step': 21837, 'epoch': 3} {'type': 'loss', 'content': 0.0206175297498703, 'timestamp': '2025-10-01 04:39:52.579718', 'step': 21838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:52.610122', 'step': 21838, 'epoch': 3} {'type': 'loss', 'content': 0.08069812506437302, 'timestamp': '2025-10-01 04:39:52.612493', 'step': 21839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:52.648266', 'step': 21839, 'epoch': 3} {'type': 'loss', 'content': 0.04092315956950188, 'timestamp': '2025-10-01 04:39:52.672391', 'step': 21840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:52.702795', 'step': 21840, 'epoch': 3} {'type': 'loss', 'content': 0.009425169788300991, 'timestamp': '2025-10-01 04:39:52.704938', 'step': 21841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:52.735610', 'step': 21841, 'epoch': 3} {'type': 'loss', 'content': 0.06455108523368835, 'timestamp': '2025-10-01 04:39:52.738102', 'step': 21842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:52.769485', 'step': 21842, 'epoch': 3} {'type': 'loss', 'content': 0.07214085012674332, 'timestamp': '2025-10-01 04:39:52.772862', 'step': 21843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:52.803029', 'step': 21843, 'epoch': 3} {'type': 'loss', 'content': 0.08323550224304199, 'timestamp': '2025-10-01 04:39:52.826656', 'step': 21844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:52.864115', 'step': 21844, 'epoch': 3} {'type': 'loss', 'content': 0.07009730488061905, 'timestamp': '2025-10-01 04:39:52.866634', 'step': 21845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:39:52.896974', 'step': 21845, 'epoch': 3} {'type': 'loss', 'content': 0.0483974851667881, 'timestamp': '2025-10-01 04:39:52.901270', 'step': 21846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:52.932071', 'step': 21846, 'epoch': 3} {'type': 'loss', 'content': 0.06417058408260345, 'timestamp': '2025-10-01 04:39:52.934323', 'step': 21847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:52.964757', 'step': 21847, 'epoch': 3} {'type': 'loss', 'content': 0.04399257153272629, 'timestamp': '2025-10-01 04:39:52.988248', 'step': 21848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:53.019659', 'step': 21848, 'epoch': 3} {'type': 'loss', 'content': 0.10515706986188889, 'timestamp': '2025-10-01 04:39:53.021867', 'step': 21849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:53.065496', 'step': 21849, 'epoch': 3} {'type': 'loss', 'content': 0.06887035816907883, 'timestamp': '2025-10-01 04:39:53.067895', 'step': 21850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:53.102902', 'step': 21850, 'epoch': 3} {'type': 'loss', 'content': 0.0230711679905653, 'timestamp': '2025-10-01 04:39:53.105220', 'step': 21851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:53.135238', 'step': 21851, 'epoch': 3} {'type': 'loss', 'content': 0.011023091152310371, 'timestamp': '2025-10-01 04:39:53.159051', 'step': 21852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:53.189873', 'step': 21852, 'epoch': 3} {'type': 'loss', 'content': 0.033292751759290695, 'timestamp': '2025-10-01 04:39:53.197753', 'step': 21853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:53.235203', 'step': 21853, 'epoch': 3} {'type': 'loss', 'content': 0.0504065565764904, 'timestamp': '2025-10-01 04:39:53.237639', 'step': 21854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:53.268460', 'step': 21854, 'epoch': 3} {'type': 'loss', 'content': 0.03260397911071777, 'timestamp': '2025-10-01 04:39:53.270847', 'step': 21855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:53.305883', 'step': 21855, 'epoch': 3} {'type': 'loss', 'content': 0.05648309364914894, 'timestamp': '2025-10-01 04:39:53.329634', 'step': 21856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:53.363404', 'step': 21856, 'epoch': 3} {'type': 'loss', 'content': 0.02110280655324459, 'timestamp': '2025-10-01 04:39:53.365591', 'step': 21857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:53.396018', 'step': 21857, 'epoch': 3} {'type': 'loss', 'content': 0.046167679131031036, 'timestamp': '2025-10-01 04:39:53.398234', 'step': 21858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:39:53.429130', 'step': 21858, 'epoch': 3} {'type': 'loss', 'content': 0.01586567983031273, 'timestamp': '2025-10-01 04:39:53.433743', 'step': 21859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:53.464795', 'step': 21859, 'epoch': 3} {'type': 'loss', 'content': 0.06272341310977936, 'timestamp': '2025-10-01 04:39:53.488826', 'step': 21860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:53.519579', 'step': 21860, 'epoch': 3} {'type': 'loss', 'content': 0.08289424329996109, 'timestamp': '2025-10-01 04:39:53.523340', 'step': 21861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:53.554367', 'step': 21861, 'epoch': 3} {'type': 'loss', 'content': 0.05052457004785538, 'timestamp': '2025-10-01 04:39:53.557183', 'step': 21862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:53.590526', 'step': 21862, 'epoch': 3} {'type': 'loss', 'content': 0.08315654844045639, 'timestamp': '2025-10-01 04:39:53.592548', 'step': 21863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:53.627412', 'step': 21863, 'epoch': 3} {'type': 'loss', 'content': 0.031759485602378845, 'timestamp': '2025-10-01 04:39:53.650979', 'step': 21864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:53.685193', 'step': 21864, 'epoch': 3} {'type': 'loss', 'content': 0.02653607726097107, 'timestamp': '2025-10-01 04:39:53.687303', 'step': 21865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:53.719658', 'step': 21865, 'epoch': 3} {'type': 'loss', 'content': 0.0479293130338192, 'timestamp': '2025-10-01 04:39:53.721879', 'step': 21866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:53.757379', 'step': 21866, 'epoch': 3} {'type': 'loss', 'content': 0.03816116601228714, 'timestamp': '2025-10-01 04:39:53.759649', 'step': 21867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:53.814685', 'step': 21867, 'epoch': 3} {'type': 'loss', 'content': 0.0058602322824299335, 'timestamp': '2025-10-01 04:39:53.838353', 'step': 21868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:53.881763', 'step': 21868, 'epoch': 3} {'type': 'loss', 'content': 0.08023439347743988, 'timestamp': '2025-10-01 04:39:53.884200', 'step': 21869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:39:53.921982', 'step': 21869, 'epoch': 3} {'type': 'loss', 'content': 0.0659024640917778, 'timestamp': '2025-10-01 04:39:53.924112', 'step': 21870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:53.954840', 'step': 21870, 'epoch': 3} {'type': 'loss', 'content': 0.06285490840673447, 'timestamp': '2025-10-01 04:39:53.956768', 'step': 21871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:53.988004', 'step': 21871, 'epoch': 3} {'type': 'loss', 'content': 0.03183313086628914, 'timestamp': '2025-10-01 04:39:54.011780', 'step': 21872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:54.043295', 'step': 21872, 'epoch': 3} {'type': 'loss', 'content': 0.08364842087030411, 'timestamp': '2025-10-01 04:39:54.046586', 'step': 21873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:54.077157', 'step': 21873, 'epoch': 3} {'type': 'loss', 'content': 0.05894272401928902, 'timestamp': '2025-10-01 04:39:54.079869', 'step': 21874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:54.112617', 'step': 21874, 'epoch': 3} {'type': 'loss', 'content': 0.12531593441963196, 'timestamp': '2025-10-01 04:39:54.114939', 'step': 21875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:54.145811', 'step': 21875, 'epoch': 3} {'type': 'loss', 'content': 0.025347745046019554, 'timestamp': '2025-10-01 04:39:54.169716', 'step': 21876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:54.201469', 'step': 21876, 'epoch': 3} {'type': 'loss', 'content': 0.1822778433561325, 'timestamp': '2025-10-01 04:39:54.203753', 'step': 21877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:54.235659', 'step': 21877, 'epoch': 3} {'type': 'loss', 'content': 0.049516305327415466, 'timestamp': '2025-10-01 04:39:54.237756', 'step': 21878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:54.277781', 'step': 21878, 'epoch': 3} {'type': 'loss', 'content': 0.07023287564516068, 'timestamp': '2025-10-01 04:39:54.280028', 'step': 21879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:54.312351', 'step': 21879, 'epoch': 3} {'type': 'loss', 'content': 0.06027865409851074, 'timestamp': '2025-10-01 04:39:54.336056', 'step': 21880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:54.368847', 'step': 21880, 'epoch': 3} {'type': 'loss', 'content': 0.06462835520505905, 'timestamp': '2025-10-01 04:39:54.370951', 'step': 21881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:54.410871', 'step': 21881, 'epoch': 3} {'type': 'loss', 'content': 0.07021839171648026, 'timestamp': '2025-10-01 04:39:54.412919', 'step': 21882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:54.443665', 'step': 21882, 'epoch': 3} {'type': 'loss', 'content': 0.0799197182059288, 'timestamp': '2025-10-01 04:39:54.445934', 'step': 21883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:54.478746', 'step': 21883, 'epoch': 3} {'type': 'loss', 'content': 0.0390029139816761, 'timestamp': '2025-10-01 04:39:54.502427', 'step': 21884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:54.535131', 'step': 21884, 'epoch': 3} {'type': 'loss', 'content': 0.0888848826289177, 'timestamp': '2025-10-01 04:39:54.537265', 'step': 21885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:54.571852', 'step': 21885, 'epoch': 3} {'type': 'loss', 'content': 0.01754298247396946, 'timestamp': '2025-10-01 04:39:54.573901', 'step': 21886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:54.605035', 'step': 21886, 'epoch': 3} {'type': 'loss', 'content': 0.035584885627031326, 'timestamp': '2025-10-01 04:39:54.607076', 'step': 21887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:54.646697', 'step': 21887, 'epoch': 3} {'type': 'loss', 'content': 0.06060335785150528, 'timestamp': '2025-10-01 04:39:54.670625', 'step': 21888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:54.707099', 'step': 21888, 'epoch': 3} {'type': 'loss', 'content': 0.020754804834723473, 'timestamp': '2025-10-01 04:39:54.710599', 'step': 21889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:54.745117', 'step': 21889, 'epoch': 3} {'type': 'loss', 'content': 0.06291093677282333, 'timestamp': '2025-10-01 04:39:54.747292', 'step': 21890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:54.787128', 'step': 21890, 'epoch': 3} {'type': 'loss', 'content': 0.08393297344446182, 'timestamp': '2025-10-01 04:39:54.789302', 'step': 21891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:54.825740', 'step': 21891, 'epoch': 3} {'type': 'loss', 'content': 0.09446625411510468, 'timestamp': '2025-10-01 04:39:54.849538', 'step': 21892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:39:54.884349', 'step': 21892, 'epoch': 3} {'type': 'loss', 'content': 0.08475427329540253, 'timestamp': '2025-10-01 04:39:54.886521', 'step': 21893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:54.924835', 'step': 21893, 'epoch': 3} {'type': 'loss', 'content': 0.0815761387348175, 'timestamp': '2025-10-01 04:39:54.926951', 'step': 21894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:39:54.957414', 'step': 21894, 'epoch': 3} {'type': 'loss', 'content': 0.07920033484697342, 'timestamp': '2025-10-01 04:39:54.959550', 'step': 21895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:54.990877', 'step': 21895, 'epoch': 3} {'type': 'loss', 'content': 0.04246256500482559, 'timestamp': '2025-10-01 04:39:55.014644', 'step': 21896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:55.047109', 'step': 21896, 'epoch': 3} {'type': 'loss', 'content': 0.08156128972768784, 'timestamp': '2025-10-01 04:39:55.049288', 'step': 21897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:55.081339', 'step': 21897, 'epoch': 3} {'type': 'loss', 'content': 0.12444238364696503, 'timestamp': '2025-10-01 04:39:55.083888', 'step': 21898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:39:55.115455', 'step': 21898, 'epoch': 3} {'type': 'loss', 'content': 0.05193644389510155, 'timestamp': '2025-10-01 04:39:55.117593', 'step': 21899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:55.149605', 'step': 21899, 'epoch': 3} {'type': 'loss', 'content': 0.08194766193628311, 'timestamp': '2025-10-01 04:39:55.173180', 'step': 21900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:55.204177', 'step': 21900, 'epoch': 3} {'type': 'loss', 'content': 0.07365715503692627, 'timestamp': '2025-10-01 04:39:55.206200', 'step': 21901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:39:55.244284', 'step': 21901, 'epoch': 3} {'type': 'loss', 'content': 0.01721435971558094, 'timestamp': '2025-10-01 04:39:55.246455', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:40:05.269097', 'step': 21902, 'epoch': 3} {'type': 'pplx', 'content': 12297.799132792843, 'timestamp': '2025-10-01 04:40:05.273107', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:05.302410', 'step': 21902, 'epoch': 3} {'type': 'loss', 'content': 0.13552702963352203, 'timestamp': '2025-10-01 04:40:05.304282', 'step': 21903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:05.334435', 'step': 21903, 'epoch': 3} {'type': 'loss', 'content': 0.017358768731355667, 'timestamp': '2025-10-01 04:40:05.358677', 'step': 21904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:05.388670', 'step': 21904, 'epoch': 3} {'type': 'loss', 'content': 0.026134658604860306, 'timestamp': '2025-10-01 04:40:05.399743', 'step': 21905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:05.431579', 'step': 21905, 'epoch': 3} {'type': 'loss', 'content': 0.05524464324116707, 'timestamp': '2025-10-01 04:40:05.434038', 'step': 21906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:05.466292', 'step': 21906, 'epoch': 3} {'type': 'loss', 'content': 0.11237184703350067, 'timestamp': '2025-10-01 04:40:05.469699', 'step': 21907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:05.501055', 'step': 21907, 'epoch': 3} {'type': 'loss', 'content': 0.05366796627640724, 'timestamp': '2025-10-01 04:40:05.524796', 'step': 21908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:05.555855', 'step': 21908, 'epoch': 3} {'type': 'loss', 'content': 0.02094745635986328, 'timestamp': '2025-10-01 04:40:05.557896', 'step': 21909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:05.588351', 'step': 21909, 'epoch': 3} {'type': 'loss', 'content': 0.03755439445376396, 'timestamp': '2025-10-01 04:40:05.590537', 'step': 21910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:05.620473', 'step': 21910, 'epoch': 3} {'type': 'loss', 'content': 0.08128346502780914, 'timestamp': '2025-10-01 04:40:05.622767', 'step': 21911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:05.653895', 'step': 21911, 'epoch': 3} {'type': 'loss', 'content': 0.049420829862356186, 'timestamp': '2025-10-01 04:40:05.678294', 'step': 21912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:05.709843', 'step': 21912, 'epoch': 3} {'type': 'loss', 'content': 0.08364547044038773, 'timestamp': '2025-10-01 04:40:05.711991', 'step': 21913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:05.741952', 'step': 21913, 'epoch': 3} {'type': 'loss', 'content': 0.02170001156628132, 'timestamp': '2025-10-01 04:40:05.745120', 'step': 21914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:05.776368', 'step': 21914, 'epoch': 3} {'type': 'loss', 'content': 0.12249761074781418, 'timestamp': '2025-10-01 04:40:05.778349', 'step': 21915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:05.809664', 'step': 21915, 'epoch': 3} {'type': 'loss', 'content': 0.05623463913798332, 'timestamp': '2025-10-01 04:40:05.836188', 'step': 21916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:05.867117', 'step': 21916, 'epoch': 3} {'type': 'loss', 'content': 0.03725302591919899, 'timestamp': '2025-10-01 04:40:05.870043', 'step': 21917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:05.900359', 'step': 21917, 'epoch': 3} {'type': 'loss', 'content': 0.096551813185215, 'timestamp': '2025-10-01 04:40:05.902499', 'step': 21918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:05.933272', 'step': 21918, 'epoch': 3} {'type': 'loss', 'content': 0.055161625146865845, 'timestamp': '2025-10-01 04:40:05.942826', 'step': 21919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:05.974997', 'step': 21919, 'epoch': 3} {'type': 'loss', 'content': 0.05114288255572319, 'timestamp': '2025-10-01 04:40:05.999183', 'step': 21920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:06.030561', 'step': 21920, 'epoch': 3} {'type': 'loss', 'content': 0.015343849547207355, 'timestamp': '2025-10-01 04:40:06.032641', 'step': 21921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:06.062387', 'step': 21921, 'epoch': 3} {'type': 'loss', 'content': 0.06029490381479263, 'timestamp': '2025-10-01 04:40:06.064690', 'step': 21922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:06.095520', 'step': 21922, 'epoch': 3} {'type': 'loss', 'content': 0.07331573963165283, 'timestamp': '2025-10-01 04:40:06.098613', 'step': 21923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:06.131458', 'step': 21923, 'epoch': 3} {'type': 'loss', 'content': 0.07924176007509232, 'timestamp': '2025-10-01 04:40:06.155196', 'step': 21924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:06.184816', 'step': 21924, 'epoch': 3} {'type': 'loss', 'content': 0.02816474810242653, 'timestamp': '2025-10-01 04:40:06.187000', 'step': 21925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:06.216711', 'step': 21925, 'epoch': 3} {'type': 'loss', 'content': 0.05585957318544388, 'timestamp': '2025-10-01 04:40:06.218863', 'step': 21926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:06.248670', 'step': 21926, 'epoch': 3} {'type': 'loss', 'content': 0.028822600841522217, 'timestamp': '2025-10-01 04:40:06.250809', 'step': 21927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:06.283072', 'step': 21927, 'epoch': 3} {'type': 'loss', 'content': 0.04254429042339325, 'timestamp': '2025-10-01 04:40:06.306655', 'step': 21928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:06.336961', 'step': 21928, 'epoch': 3} {'type': 'loss', 'content': 0.09462589025497437, 'timestamp': '2025-10-01 04:40:06.339561', 'step': 21929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:06.372133', 'step': 21929, 'epoch': 3} {'type': 'loss', 'content': 0.022924819961190224, 'timestamp': '2025-10-01 04:40:06.374360', 'step': 21930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:06.404252', 'step': 21930, 'epoch': 3} {'type': 'loss', 'content': 0.03551095351576805, 'timestamp': '2025-10-01 04:40:06.406725', 'step': 21931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:06.437318', 'step': 21931, 'epoch': 3} {'type': 'loss', 'content': 0.07278672605752945, 'timestamp': '2025-10-01 04:40:06.460747', 'step': 21932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:06.490742', 'step': 21932, 'epoch': 3} {'type': 'loss', 'content': 0.09093174338340759, 'timestamp': '2025-10-01 04:40:06.492911', 'step': 21933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:06.522812', 'step': 21933, 'epoch': 3} {'type': 'loss', 'content': 0.08009754866361618, 'timestamp': '2025-10-01 04:40:06.525544', 'step': 21934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:06.557635', 'step': 21934, 'epoch': 3} {'type': 'loss', 'content': 0.08568458259105682, 'timestamp': '2025-10-01 04:40:06.560232', 'step': 21935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:06.590122', 'step': 21935, 'epoch': 3} {'type': 'loss', 'content': 0.07967450469732285, 'timestamp': '2025-10-01 04:40:06.613787', 'step': 21936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:06.645208', 'step': 21936, 'epoch': 3} {'type': 'loss', 'content': 0.10421694070100784, 'timestamp': '2025-10-01 04:40:06.647291', 'step': 21937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:06.677810', 'step': 21937, 'epoch': 3} {'type': 'loss', 'content': 0.087726891040802, 'timestamp': '2025-10-01 04:40:06.679870', 'step': 21938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:06.710904', 'step': 21938, 'epoch': 3} {'type': 'loss', 'content': 0.05400744825601578, 'timestamp': '2025-10-01 04:40:06.713262', 'step': 21939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:06.743830', 'step': 21939, 'epoch': 3} {'type': 'loss', 'content': 0.009995815344154835, 'timestamp': '2025-10-01 04:40:06.773669', 'step': 21940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:06.803895', 'step': 21940, 'epoch': 3} {'type': 'loss', 'content': 0.12919527292251587, 'timestamp': '2025-10-01 04:40:06.807368', 'step': 21941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:06.838256', 'step': 21941, 'epoch': 3} {'type': 'loss', 'content': 0.05159013345837593, 'timestamp': '2025-10-01 04:40:06.840569', 'step': 21942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:06.871976', 'step': 21942, 'epoch': 3} {'type': 'loss', 'content': 0.03560977801680565, 'timestamp': '2025-10-01 04:40:06.874461', 'step': 21943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:06.905079', 'step': 21943, 'epoch': 3} {'type': 'loss', 'content': 0.015593498013913631, 'timestamp': '2025-10-01 04:40:06.928839', 'step': 21944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:06.959949', 'step': 21944, 'epoch': 3} {'type': 'loss', 'content': 0.0896398276090622, 'timestamp': '2025-10-01 04:40:06.962143', 'step': 21945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:06.993842', 'step': 21945, 'epoch': 3} {'type': 'loss', 'content': 0.03678402304649353, 'timestamp': '2025-10-01 04:40:06.996141', 'step': 21946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.027108', 'step': 21946, 'epoch': 3} {'type': 'loss', 'content': 0.07087542861700058, 'timestamp': '2025-10-01 04:40:07.029420', 'step': 21947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.059874', 'step': 21947, 'epoch': 3} {'type': 'loss', 'content': 0.03426026925444603, 'timestamp': '2025-10-01 04:40:07.083579', 'step': 21948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:40:07.115531', 'step': 21948, 'epoch': 3} {'type': 'loss', 'content': 0.026663092896342278, 'timestamp': '2025-10-01 04:40:07.117786', 'step': 21949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:07.149672', 'step': 21949, 'epoch': 3} {'type': 'loss', 'content': 0.07483414560556412, 'timestamp': '2025-10-01 04:40:07.151808', 'step': 21950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:07.183215', 'step': 21950, 'epoch': 3} {'type': 'loss', 'content': 0.034905485808849335, 'timestamp': '2025-10-01 04:40:07.185448', 'step': 21951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:07.218007', 'step': 21951, 'epoch': 3} {'type': 'loss', 'content': 0.011077445931732655, 'timestamp': '2025-10-01 04:40:07.241630', 'step': 21952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:07.283456', 'step': 21952, 'epoch': 3} {'type': 'loss', 'content': 0.05491907522082329, 'timestamp': '2025-10-01 04:40:07.286009', 'step': 21953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.318277', 'step': 21953, 'epoch': 3} {'type': 'loss', 'content': 0.06036510318517685, 'timestamp': '2025-10-01 04:40:07.320612', 'step': 21954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:07.351044', 'step': 21954, 'epoch': 3} {'type': 'loss', 'content': 0.04258333519101143, 'timestamp': '2025-10-01 04:40:07.353151', 'step': 21955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:07.383431', 'step': 21955, 'epoch': 3} {'type': 'loss', 'content': 0.03581903129816055, 'timestamp': '2025-10-01 04:40:07.407203', 'step': 21956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:07.439522', 'step': 21956, 'epoch': 3} {'type': 'loss', 'content': 0.14460298418998718, 'timestamp': '2025-10-01 04:40:07.441597', 'step': 21957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.480653', 'step': 21957, 'epoch': 3} {'type': 'loss', 'content': 0.027108386158943176, 'timestamp': '2025-10-01 04:40:07.483093', 'step': 21958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:07.514880', 'step': 21958, 'epoch': 3} {'type': 'loss', 'content': 0.07671434432268143, 'timestamp': '2025-10-01 04:40:07.517404', 'step': 21959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.548259', 'step': 21959, 'epoch': 3} {'type': 'loss', 'content': 0.06650027632713318, 'timestamp': '2025-10-01 04:40:07.571945', 'step': 21960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.614586', 'step': 21960, 'epoch': 3} {'type': 'loss', 'content': 0.05924445018172264, 'timestamp': '2025-10-01 04:40:07.617659', 'step': 21961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.649013', 'step': 21961, 'epoch': 3} {'type': 'loss', 'content': 0.053305964916944504, 'timestamp': '2025-10-01 04:40:07.651886', 'step': 21962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.692796', 'step': 21962, 'epoch': 3} {'type': 'loss', 'content': 0.01468946784734726, 'timestamp': '2025-10-01 04:40:07.695318', 'step': 21963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:07.725713', 'step': 21963, 'epoch': 3} {'type': 'loss', 'content': 0.05226048082113266, 'timestamp': '2025-10-01 04:40:07.749878', 'step': 21964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:07.781650', 'step': 21964, 'epoch': 3} {'type': 'loss', 'content': 0.030482303351163864, 'timestamp': '2025-10-01 04:40:07.783919', 'step': 21965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.814981', 'step': 21965, 'epoch': 3} {'type': 'loss', 'content': 0.04587460681796074, 'timestamp': '2025-10-01 04:40:07.817331', 'step': 21966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.848369', 'step': 21966, 'epoch': 3} {'type': 'loss', 'content': 0.04299090802669525, 'timestamp': '2025-10-01 04:40:07.850547', 'step': 21967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.882752', 'step': 21967, 'epoch': 3} {'type': 'loss', 'content': 0.0677478238940239, 'timestamp': '2025-10-01 04:40:07.906629', 'step': 21968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.936866', 'step': 21968, 'epoch': 3} {'type': 'loss', 'content': 0.046027690172195435, 'timestamp': '2025-10-01 04:40:07.939310', 'step': 21969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:07.970859', 'step': 21969, 'epoch': 3} {'type': 'loss', 'content': 0.00923006609082222, 'timestamp': '2025-10-01 04:40:07.973428', 'step': 21970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:08.003634', 'step': 21970, 'epoch': 3} {'type': 'loss', 'content': 0.0942707434296608, 'timestamp': '2025-10-01 04:40:08.006478', 'step': 21971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:08.038777', 'step': 21971, 'epoch': 3} {'type': 'loss', 'content': 0.06360206007957458, 'timestamp': '2025-10-01 04:40:08.062472', 'step': 21972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:08.094025', 'step': 21972, 'epoch': 3} {'type': 'loss', 'content': 0.09229233115911484, 'timestamp': '2025-10-01 04:40:08.096308', 'step': 21973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:08.128852', 'step': 21973, 'epoch': 3} {'type': 'loss', 'content': 0.037215784192085266, 'timestamp': '2025-10-01 04:40:08.131157', 'step': 21974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:08.162437', 'step': 21974, 'epoch': 3} {'type': 'loss', 'content': 0.09290874749422073, 'timestamp': '2025-10-01 04:40:08.164695', 'step': 21975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:08.195937', 'step': 21975, 'epoch': 3} {'type': 'loss', 'content': 0.025115419179201126, 'timestamp': '2025-10-01 04:40:08.219920', 'step': 21976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:08.250224', 'step': 21976, 'epoch': 3} {'type': 'loss', 'content': 0.09623654186725616, 'timestamp': '2025-10-01 04:40:08.252182', 'step': 21977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:08.282838', 'step': 21977, 'epoch': 3} {'type': 'loss', 'content': 0.09399783611297607, 'timestamp': '2025-10-01 04:40:08.285653', 'step': 21978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:08.316023', 'step': 21978, 'epoch': 3} {'type': 'loss', 'content': 0.1076701357960701, 'timestamp': '2025-10-01 04:40:08.318894', 'step': 21979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:08.349689', 'step': 21979, 'epoch': 3} {'type': 'loss', 'content': 0.17184223234653473, 'timestamp': '2025-10-01 04:40:08.373545', 'step': 21980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:08.405165', 'step': 21980, 'epoch': 3} {'type': 'loss', 'content': 0.027822785079479218, 'timestamp': '2025-10-01 04:40:08.407295', 'step': 21981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:08.438461', 'step': 21981, 'epoch': 3} {'type': 'loss', 'content': 0.10262425988912582, 'timestamp': '2025-10-01 04:40:08.447422', 'step': 21982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:08.477431', 'step': 21982, 'epoch': 3} {'type': 'loss', 'content': 0.07520820200443268, 'timestamp': '2025-10-01 04:40:08.479404', 'step': 21983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:08.511281', 'step': 21983, 'epoch': 3} {'type': 'loss', 'content': 0.07671131193637848, 'timestamp': '2025-10-01 04:40:08.534770', 'step': 21984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:08.565756', 'step': 21984, 'epoch': 3} {'type': 'loss', 'content': 0.06093037873506546, 'timestamp': '2025-10-01 04:40:08.568485', 'step': 21985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:08.598776', 'step': 21985, 'epoch': 3} {'type': 'loss', 'content': 0.1205848753452301, 'timestamp': '2025-10-01 04:40:08.600834', 'step': 21986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:40:08.632741', 'step': 21986, 'epoch': 3} {'type': 'loss', 'content': 0.13798688352108002, 'timestamp': '2025-10-01 04:40:08.637254', 'step': 21987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:08.670071', 'step': 21987, 'epoch': 3} {'type': 'loss', 'content': 0.00923468079417944, 'timestamp': '2025-10-01 04:40:08.709898', 'step': 21988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:08.742194', 'step': 21988, 'epoch': 3} {'type': 'loss', 'content': 0.056074220687150955, 'timestamp': '2025-10-01 04:40:08.744855', 'step': 21989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:08.775329', 'step': 21989, 'epoch': 3} {'type': 'loss', 'content': 0.0495452918112278, 'timestamp': '2025-10-01 04:40:08.777562', 'step': 21990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:08.808176', 'step': 21990, 'epoch': 3} {'type': 'loss', 'content': 0.09166709333658218, 'timestamp': '2025-10-01 04:40:08.827139', 'step': 21991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:08.857589', 'step': 21991, 'epoch': 3} {'type': 'loss', 'content': 0.08410773426294327, 'timestamp': '2025-10-01 04:40:08.881543', 'step': 21992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:08.912258', 'step': 21992, 'epoch': 3} {'type': 'loss', 'content': 0.025965692475438118, 'timestamp': '2025-10-01 04:40:08.914895', 'step': 21993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:08.945522', 'step': 21993, 'epoch': 3} {'type': 'loss', 'content': 0.10646593570709229, 'timestamp': '2025-10-01 04:40:08.948196', 'step': 21994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:08.978195', 'step': 21994, 'epoch': 3} {'type': 'loss', 'content': 0.027918312698602676, 'timestamp': '2025-10-01 04:40:08.981001', 'step': 21995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:09.011424', 'step': 21995, 'epoch': 3} {'type': 'loss', 'content': 0.0561717189848423, 'timestamp': '2025-10-01 04:40:09.035170', 'step': 21996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:09.066017', 'step': 21996, 'epoch': 3} {'type': 'loss', 'content': 0.042855240404605865, 'timestamp': '2025-10-01 04:40:09.068556', 'step': 21997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:09.099313', 'step': 21997, 'epoch': 3} {'type': 'loss', 'content': 0.09977222979068756, 'timestamp': '2025-10-01 04:40:09.101749', 'step': 21998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:09.133146', 'step': 21998, 'epoch': 3} {'type': 'loss', 'content': 0.04305721074342728, 'timestamp': '2025-10-01 04:40:09.135632', 'step': 21999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:09.166046', 'step': 21999, 'epoch': 3} {'type': 'loss', 'content': 0.0465458445250988, 'timestamp': '2025-10-01 04:40:09.190044', 'step': 22000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 22000', 'timestamp': '2025-10-01 04:40:14.521157', 'step': 22000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:14.553533', 'step': 22000, 'epoch': 3} {'type': 'loss', 'content': 0.10532360523939133, 'timestamp': '2025-10-01 04:40:14.555476', 'step': 22001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:14.586822', 'step': 22001, 'epoch': 3} {'type': 'loss', 'content': 0.05327099189162254, 'timestamp': '2025-10-01 04:40:14.588758', 'step': 22002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:14.619342', 'step': 22002, 'epoch': 3} {'type': 'loss', 'content': 0.06175354868173599, 'timestamp': '2025-10-01 04:40:14.621500', 'step': 22003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:14.652311', 'step': 22003, 'epoch': 3} {'type': 'loss', 'content': 0.07508110255002975, 'timestamp': '2025-10-01 04:40:14.676688', 'step': 22004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:14.706896', 'step': 22004, 'epoch': 3} {'type': 'loss', 'content': 0.11310688406229019, 'timestamp': '2025-10-01 04:40:14.708753', 'step': 22005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:14.742709', 'step': 22005, 'epoch': 3} {'type': 'loss', 'content': 0.07356862723827362, 'timestamp': '2025-10-01 04:40:14.744454', 'step': 22006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:14.774815', 'step': 22006, 'epoch': 3} {'type': 'loss', 'content': 0.09808000922203064, 'timestamp': '2025-10-01 04:40:14.777537', 'step': 22007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:14.810993', 'step': 22007, 'epoch': 3} {'type': 'loss', 'content': 0.10536843538284302, 'timestamp': '2025-10-01 04:40:14.835267', 'step': 22008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:14.869622', 'step': 22008, 'epoch': 3} {'type': 'loss', 'content': 0.06309852749109268, 'timestamp': '2025-10-01 04:40:14.874793', 'step': 22009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:14.911266', 'step': 22009, 'epoch': 3} {'type': 'loss', 'content': 0.0013837292790412903, 'timestamp': '2025-10-01 04:40:14.924861', 'step': 22010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:14.956583', 'step': 22010, 'epoch': 3} {'type': 'loss', 'content': 0.06065088510513306, 'timestamp': '2025-10-01 04:40:14.959419', 'step': 22011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:14.992684', 'step': 22011, 'epoch': 3} {'type': 'loss', 'content': 0.03188005089759827, 'timestamp': '2025-10-01 04:40:15.019217', 'step': 22012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:15.050695', 'step': 22012, 'epoch': 3} {'type': 'loss', 'content': 0.06628540903329849, 'timestamp': '2025-10-01 04:40:15.059466', 'step': 22013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:15.093041', 'step': 22013, 'epoch': 3} {'type': 'loss', 'content': 0.0054990374483168125, 'timestamp': '2025-10-01 04:40:15.095247', 'step': 22014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:15.127624', 'step': 22014, 'epoch': 3} {'type': 'loss', 'content': 0.08078484982252121, 'timestamp': '2025-10-01 04:40:15.134788', 'step': 22015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:15.169229', 'step': 22015, 'epoch': 3} {'type': 'loss', 'content': 0.11750683188438416, 'timestamp': '2025-10-01 04:40:15.192851', 'step': 22016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:15.233603', 'step': 22016, 'epoch': 3} {'type': 'loss', 'content': 0.0978708490729332, 'timestamp': '2025-10-01 04:40:15.236196', 'step': 22017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:15.274899', 'step': 22017, 'epoch': 3} {'type': 'loss', 'content': 0.04277884587645531, 'timestamp': '2025-10-01 04:40:15.277833', 'step': 22018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:15.314708', 'step': 22018, 'epoch': 3} {'type': 'loss', 'content': 0.04238330200314522, 'timestamp': '2025-10-01 04:40:15.316895', 'step': 22019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:15.351228', 'step': 22019, 'epoch': 3} {'type': 'loss', 'content': 0.07090240716934204, 'timestamp': '2025-10-01 04:40:15.375068', 'step': 22020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:15.407224', 'step': 22020, 'epoch': 3} {'type': 'loss', 'content': 0.10674580186605453, 'timestamp': '2025-10-01 04:40:15.409483', 'step': 22021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:15.442293', 'step': 22021, 'epoch': 3} {'type': 'loss', 'content': 0.1361771821975708, 'timestamp': '2025-10-01 04:40:15.444419', 'step': 22022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:15.476032', 'step': 22022, 'epoch': 3} {'type': 'loss', 'content': 0.08233288675546646, 'timestamp': '2025-10-01 04:40:15.478847', 'step': 22023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:15.511850', 'step': 22023, 'epoch': 3} {'type': 'loss', 'content': 0.10886529088020325, 'timestamp': '2025-10-01 04:40:15.535684', 'step': 22024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:15.566859', 'step': 22024, 'epoch': 3} {'type': 'loss', 'content': 0.06671825051307678, 'timestamp': '2025-10-01 04:40:15.568895', 'step': 22025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:15.601598', 'step': 22025, 'epoch': 3} {'type': 'loss', 'content': 0.05057807266712189, 'timestamp': '2025-10-01 04:40:15.604877', 'step': 22026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:15.645373', 'step': 22026, 'epoch': 3} {'type': 'loss', 'content': 0.018982432782649994, 'timestamp': '2025-10-01 04:40:15.653157', 'step': 22027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:40:15.688399', 'step': 22027, 'epoch': 3} {'type': 'loss', 'content': 0.028205661103129387, 'timestamp': '2025-10-01 04:40:15.713837', 'step': 22028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:15.747625', 'step': 22028, 'epoch': 3} {'type': 'loss', 'content': 0.030314020812511444, 'timestamp': '2025-10-01 04:40:15.749780', 'step': 22029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:15.784928', 'step': 22029, 'epoch': 3} {'type': 'loss', 'content': 0.06486109644174576, 'timestamp': '2025-10-01 04:40:15.787697', 'step': 22030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:15.821808', 'step': 22030, 'epoch': 3} {'type': 'loss', 'content': 0.06289704144001007, 'timestamp': '2025-10-01 04:40:15.824251', 'step': 22031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:15.855742', 'step': 22031, 'epoch': 3} {'type': 'loss', 'content': 0.12399092316627502, 'timestamp': '2025-10-01 04:40:15.879339', 'step': 22032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:15.929052', 'step': 22032, 'epoch': 3} {'type': 'loss', 'content': 0.06476674973964691, 'timestamp': '2025-10-01 04:40:15.931143', 'step': 22033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:15.962824', 'step': 22033, 'epoch': 3} {'type': 'loss', 'content': 0.035226862877607346, 'timestamp': '2025-10-01 04:40:15.968638', 'step': 22034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:16.010344', 'step': 22034, 'epoch': 3} {'type': 'loss', 'content': 0.0905984491109848, 'timestamp': '2025-10-01 04:40:16.012584', 'step': 22035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:16.044603', 'step': 22035, 'epoch': 3} {'type': 'loss', 'content': 0.02281252294778824, 'timestamp': '2025-10-01 04:40:16.068426', 'step': 22036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:16.100534', 'step': 22036, 'epoch': 3} {'type': 'loss', 'content': 0.07035891711711884, 'timestamp': '2025-10-01 04:40:16.103228', 'step': 22037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:16.134140', 'step': 22037, 'epoch': 3} {'type': 'loss', 'content': 0.11762279272079468, 'timestamp': '2025-10-01 04:40:16.136111', 'step': 22038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:16.167599', 'step': 22038, 'epoch': 3} {'type': 'loss', 'content': 0.0189395472407341, 'timestamp': '2025-10-01 04:40:16.170268', 'step': 22039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:16.202245', 'step': 22039, 'epoch': 3} {'type': 'loss', 'content': 0.08005450665950775, 'timestamp': '2025-10-01 04:40:16.225861', 'step': 22040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:16.264070', 'step': 22040, 'epoch': 3} {'type': 'loss', 'content': 0.1120053082704544, 'timestamp': '2025-10-01 04:40:16.266306', 'step': 22041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:16.300560', 'step': 22041, 'epoch': 3} {'type': 'loss', 'content': 0.032350122928619385, 'timestamp': '2025-10-01 04:40:16.302862', 'step': 22042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:16.333346', 'step': 22042, 'epoch': 3} {'type': 'loss', 'content': 0.0684848353266716, 'timestamp': '2025-10-01 04:40:16.335609', 'step': 22043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:16.365805', 'step': 22043, 'epoch': 3} {'type': 'loss', 'content': 0.039023078978061676, 'timestamp': '2025-10-01 04:40:16.389417', 'step': 22044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:16.422139', 'step': 22044, 'epoch': 3} {'type': 'loss', 'content': 0.03614871948957443, 'timestamp': '2025-10-01 04:40:16.424194', 'step': 22045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:16.454802', 'step': 22045, 'epoch': 3} {'type': 'loss', 'content': 0.04105057194828987, 'timestamp': '2025-10-01 04:40:16.456979', 'step': 22046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:16.492902', 'step': 22046, 'epoch': 3} {'type': 'loss', 'content': 0.034663934260606766, 'timestamp': '2025-10-01 04:40:16.495139', 'step': 22047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:16.526325', 'step': 22047, 'epoch': 3} {'type': 'loss', 'content': 0.03977108746767044, 'timestamp': '2025-10-01 04:40:16.549916', 'step': 22048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:16.581867', 'step': 22048, 'epoch': 3} {'type': 'loss', 'content': 0.03577766567468643, 'timestamp': '2025-10-01 04:40:16.583564', 'step': 22049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:16.613883', 'step': 22049, 'epoch': 3} {'type': 'loss', 'content': 0.04793226718902588, 'timestamp': '2025-10-01 04:40:16.616024', 'step': 22050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:16.654354', 'step': 22050, 'epoch': 3} {'type': 'loss', 'content': 0.04287511855363846, 'timestamp': '2025-10-01 04:40:16.656599', 'step': 22051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:16.687080', 'step': 22051, 'epoch': 3} {'type': 'loss', 'content': 0.04135902225971222, 'timestamp': '2025-10-01 04:40:16.710850', 'step': 22052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:16.741220', 'step': 22052, 'epoch': 3} {'type': 'loss', 'content': 0.06131772696971893, 'timestamp': '2025-10-01 04:40:16.747247', 'step': 22053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:16.789289', 'step': 22053, 'epoch': 3} {'type': 'loss', 'content': 0.040629129856824875, 'timestamp': '2025-10-01 04:40:16.791320', 'step': 22054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:16.821907', 'step': 22054, 'epoch': 3} {'type': 'loss', 'content': 0.05393122881650925, 'timestamp': '2025-10-01 04:40:16.824384', 'step': 22055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:16.854684', 'step': 22055, 'epoch': 3} {'type': 'loss', 'content': 0.07079562544822693, 'timestamp': '2025-10-01 04:40:16.878324', 'step': 22056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:16.908263', 'step': 22056, 'epoch': 3} {'type': 'loss', 'content': 0.0206220094114542, 'timestamp': '2025-10-01 04:40:16.910432', 'step': 22057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:16.940536', 'step': 22057, 'epoch': 3} {'type': 'loss', 'content': 0.026644406840205193, 'timestamp': '2025-10-01 04:40:16.942878', 'step': 22058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:16.972846', 'step': 22058, 'epoch': 3} {'type': 'loss', 'content': 0.042018283158540726, 'timestamp': '2025-10-01 04:40:16.974894', 'step': 22059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:17.005586', 'step': 22059, 'epoch': 3} {'type': 'loss', 'content': 0.043922606855630875, 'timestamp': '2025-10-01 04:40:17.029285', 'step': 22060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:17.059551', 'step': 22060, 'epoch': 3} {'type': 'loss', 'content': 0.04000856727361679, 'timestamp': '2025-10-01 04:40:17.073317', 'step': 22061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:17.105317', 'step': 22061, 'epoch': 3} {'type': 'loss', 'content': 0.030742468312382698, 'timestamp': '2025-10-01 04:40:17.107461', 'step': 22062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:17.138061', 'step': 22062, 'epoch': 3} {'type': 'loss', 'content': 0.04627837985754013, 'timestamp': '2025-10-01 04:40:17.140202', 'step': 22063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:17.171724', 'step': 22063, 'epoch': 3} {'type': 'loss', 'content': 0.09452205151319504, 'timestamp': '2025-10-01 04:40:17.195552', 'step': 22064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:17.226241', 'step': 22064, 'epoch': 3} {'type': 'loss', 'content': 0.02353641949594021, 'timestamp': '2025-10-01 04:40:17.228577', 'step': 22065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:17.258894', 'step': 22065, 'epoch': 3} {'type': 'loss', 'content': 0.06575492769479752, 'timestamp': '2025-10-01 04:40:17.261260', 'step': 22066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:17.298855', 'step': 22066, 'epoch': 3} {'type': 'loss', 'content': 0.03623243048787117, 'timestamp': '2025-10-01 04:40:17.301164', 'step': 22067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:17.332872', 'step': 22067, 'epoch': 3} {'type': 'loss', 'content': 0.047931358218193054, 'timestamp': '2025-10-01 04:40:17.356630', 'step': 22068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:17.387890', 'step': 22068, 'epoch': 3} {'type': 'loss', 'content': 0.09428548812866211, 'timestamp': '2025-10-01 04:40:17.390111', 'step': 22069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:17.420142', 'step': 22069, 'epoch': 3} {'type': 'loss', 'content': 0.10201903432607651, 'timestamp': '2025-10-01 04:40:17.422303', 'step': 22070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:17.458268', 'step': 22070, 'epoch': 3} {'type': 'loss', 'content': 0.02842983603477478, 'timestamp': '2025-10-01 04:40:17.460642', 'step': 22071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:17.498397', 'step': 22071, 'epoch': 3} {'type': 'loss', 'content': 0.14327284693717957, 'timestamp': '2025-10-01 04:40:17.522294', 'step': 22072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:17.552275', 'step': 22072, 'epoch': 3} {'type': 'loss', 'content': 0.06210947781801224, 'timestamp': '2025-10-01 04:40:17.556739', 'step': 22073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:17.592125', 'step': 22073, 'epoch': 3} {'type': 'loss', 'content': 0.05606542155146599, 'timestamp': '2025-10-01 04:40:17.596673', 'step': 22074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:17.626876', 'step': 22074, 'epoch': 3} {'type': 'loss', 'content': 0.04507647082209587, 'timestamp': '2025-10-01 04:40:17.632563', 'step': 22075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:17.662696', 'step': 22075, 'epoch': 3} {'type': 'loss', 'content': 0.06455827504396439, 'timestamp': '2025-10-01 04:40:17.686440', 'step': 22076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:17.718690', 'step': 22076, 'epoch': 3} {'type': 'loss', 'content': 0.041635822504758835, 'timestamp': '2025-10-01 04:40:17.720509', 'step': 22077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:40:17.750602', 'step': 22077, 'epoch': 3} {'type': 'loss', 'content': 0.04397391527891159, 'timestamp': '2025-10-01 04:40:17.755360', 'step': 22078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:17.786682', 'step': 22078, 'epoch': 3} {'type': 'loss', 'content': 0.10866294801235199, 'timestamp': '2025-10-01 04:40:17.788886', 'step': 22079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:17.820854', 'step': 22079, 'epoch': 3} {'type': 'loss', 'content': 0.013156765140593052, 'timestamp': '2025-10-01 04:40:17.844891', 'step': 22080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:17.877615', 'step': 22080, 'epoch': 3} {'type': 'loss', 'content': 0.09918267279863358, 'timestamp': '2025-10-01 04:40:17.879623', 'step': 22081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:17.910420', 'step': 22081, 'epoch': 3} {'type': 'loss', 'content': 0.0925210639834404, 'timestamp': '2025-10-01 04:40:17.912689', 'step': 22082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:17.942894', 'step': 22082, 'epoch': 3} {'type': 'loss', 'content': 0.08425256609916687, 'timestamp': '2025-10-01 04:40:17.946942', 'step': 22083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:17.981462', 'step': 22083, 'epoch': 3} {'type': 'loss', 'content': 0.08561721444129944, 'timestamp': '2025-10-01 04:40:18.005246', 'step': 22084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:18.035879', 'step': 22084, 'epoch': 3} {'type': 'loss', 'content': 0.05690634623169899, 'timestamp': '2025-10-01 04:40:18.038818', 'step': 22085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:18.082692', 'step': 22085, 'epoch': 3} {'type': 'loss', 'content': 0.06173858046531677, 'timestamp': '2025-10-01 04:40:18.085067', 'step': 22086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:18.114804', 'step': 22086, 'epoch': 3} {'type': 'loss', 'content': 0.1187557652592659, 'timestamp': '2025-10-01 04:40:18.116995', 'step': 22087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:18.147383', 'step': 22087, 'epoch': 3} {'type': 'loss', 'content': 0.040186867117881775, 'timestamp': '2025-10-01 04:40:18.171278', 'step': 22088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:18.207576', 'step': 22088, 'epoch': 3} {'type': 'loss', 'content': 0.08483582735061646, 'timestamp': '2025-10-01 04:40:18.212695', 'step': 22089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:18.254945', 'step': 22089, 'epoch': 3} {'type': 'loss', 'content': 0.05456608906388283, 'timestamp': '2025-10-01 04:40:18.256926', 'step': 22090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:18.286790', 'step': 22090, 'epoch': 3} {'type': 'loss', 'content': 0.0651424303650856, 'timestamp': '2025-10-01 04:40:18.288747', 'step': 22091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:18.319024', 'step': 22091, 'epoch': 3} {'type': 'loss', 'content': 0.062274739146232605, 'timestamp': '2025-10-01 04:40:18.342449', 'step': 22092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:18.373352', 'step': 22092, 'epoch': 3} {'type': 'loss', 'content': 0.014033874496817589, 'timestamp': '2025-10-01 04:40:18.375287', 'step': 22093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:18.410274', 'step': 22093, 'epoch': 3} {'type': 'loss', 'content': 0.03300042822957039, 'timestamp': '2025-10-01 04:40:18.412091', 'step': 22094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:18.442466', 'step': 22094, 'epoch': 3} {'type': 'loss', 'content': 0.04466859996318817, 'timestamp': '2025-10-01 04:40:18.444528', 'step': 22095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:18.474464', 'step': 22095, 'epoch': 3} {'type': 'loss', 'content': 0.08550365269184113, 'timestamp': '2025-10-01 04:40:18.498490', 'step': 22096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:18.528873', 'step': 22096, 'epoch': 3} {'type': 'loss', 'content': 0.13289347290992737, 'timestamp': '2025-10-01 04:40:18.530739', 'step': 22097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:18.567504', 'step': 22097, 'epoch': 3} {'type': 'loss', 'content': 0.0655093863606453, 'timestamp': '2025-10-01 04:40:18.569507', 'step': 22098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:18.599343', 'step': 22098, 'epoch': 3} {'type': 'loss', 'content': 0.03584779053926468, 'timestamp': '2025-10-01 04:40:18.601365', 'step': 22099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:18.631285', 'step': 22099, 'epoch': 3} {'type': 'loss', 'content': 0.05304117873311043, 'timestamp': '2025-10-01 04:40:18.654769', 'step': 22100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:18.684466', 'step': 22100, 'epoch': 3} {'type': 'loss', 'content': 0.12112173438072205, 'timestamp': '2025-10-01 04:40:18.686391', 'step': 22101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:18.717358', 'step': 22101, 'epoch': 3} {'type': 'loss', 'content': 0.09801623970270157, 'timestamp': '2025-10-01 04:40:18.719306', 'step': 22102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:18.750193', 'step': 22102, 'epoch': 3} {'type': 'loss', 'content': 0.06585924327373505, 'timestamp': '2025-10-01 04:40:18.754742', 'step': 22103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:18.785149', 'step': 22103, 'epoch': 3} {'type': 'loss', 'content': 0.08758459985256195, 'timestamp': '2025-10-01 04:40:18.809095', 'step': 22104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:18.839682', 'step': 22104, 'epoch': 3} {'type': 'loss', 'content': 0.09310820698738098, 'timestamp': '2025-10-01 04:40:18.841725', 'step': 22105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:18.877045', 'step': 22105, 'epoch': 3} {'type': 'loss', 'content': 0.050919126719236374, 'timestamp': '2025-10-01 04:40:18.879738', 'step': 22106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:18.910711', 'step': 22106, 'epoch': 3} {'type': 'loss', 'content': 0.08755956590175629, 'timestamp': '2025-10-01 04:40:18.913199', 'step': 22107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:18.944685', 'step': 22107, 'epoch': 3} {'type': 'loss', 'content': 0.05815689265727997, 'timestamp': '2025-10-01 04:40:18.968844', 'step': 22108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:18.999709', 'step': 22108, 'epoch': 3} {'type': 'loss', 'content': 0.06375007331371307, 'timestamp': '2025-10-01 04:40:19.002366', 'step': 22109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:19.033877', 'step': 22109, 'epoch': 3} {'type': 'loss', 'content': 0.049427393823862076, 'timestamp': '2025-10-01 04:40:19.036512', 'step': 22110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:19.067505', 'step': 22110, 'epoch': 3} {'type': 'loss', 'content': 0.053753357380628586, 'timestamp': '2025-10-01 04:40:19.069944', 'step': 22111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:19.100218', 'step': 22111, 'epoch': 3} {'type': 'loss', 'content': 0.091530941426754, 'timestamp': '2025-10-01 04:40:19.123915', 'step': 22112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:19.154790', 'step': 22112, 'epoch': 3} {'type': 'loss', 'content': 0.07209793478250504, 'timestamp': '2025-10-01 04:40:19.157479', 'step': 22113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:19.191960', 'step': 22113, 'epoch': 3} {'type': 'loss', 'content': 0.1092248186469078, 'timestamp': '2025-10-01 04:40:19.194534', 'step': 22114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:19.225093', 'step': 22114, 'epoch': 3} {'type': 'loss', 'content': 0.07984558492898941, 'timestamp': '2025-10-01 04:40:19.229110', 'step': 22115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:19.259671', 'step': 22115, 'epoch': 3} {'type': 'loss', 'content': 0.045545488595962524, 'timestamp': '2025-10-01 04:40:19.283784', 'step': 22116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:19.315361', 'step': 22116, 'epoch': 3} {'type': 'loss', 'content': 0.06087996065616608, 'timestamp': '2025-10-01 04:40:19.317578', 'step': 22117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:19.350258', 'step': 22117, 'epoch': 3} {'type': 'loss', 'content': 0.005240469705313444, 'timestamp': '2025-10-01 04:40:19.352889', 'step': 22118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:19.383613', 'step': 22118, 'epoch': 3} {'type': 'loss', 'content': 0.042804084718227386, 'timestamp': '2025-10-01 04:40:19.386123', 'step': 22119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:19.422396', 'step': 22119, 'epoch': 3} {'type': 'loss', 'content': 0.044620852917432785, 'timestamp': '2025-10-01 04:40:19.446387', 'step': 22120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:19.477247', 'step': 22120, 'epoch': 3} {'type': 'loss', 'content': 0.06199207901954651, 'timestamp': '2025-10-01 04:40:19.479523', 'step': 22121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:19.510797', 'step': 22121, 'epoch': 3} {'type': 'loss', 'content': 0.0575861930847168, 'timestamp': '2025-10-01 04:40:19.513286', 'step': 22122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:19.544801', 'step': 22122, 'epoch': 3} {'type': 'loss', 'content': 0.15475206077098846, 'timestamp': '2025-10-01 04:40:19.547413', 'step': 22123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:19.577680', 'step': 22123, 'epoch': 3} {'type': 'loss', 'content': 0.04099220409989357, 'timestamp': '2025-10-01 04:40:19.601795', 'step': 22124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:19.632275', 'step': 22124, 'epoch': 3} {'type': 'loss', 'content': 0.08543112874031067, 'timestamp': '2025-10-01 04:40:19.635623', 'step': 22125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:19.666115', 'step': 22125, 'epoch': 3} {'type': 'loss', 'content': 0.03142918646335602, 'timestamp': '2025-10-01 04:40:19.668302', 'step': 22126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:19.698608', 'step': 22126, 'epoch': 3} {'type': 'loss', 'content': 0.10283543169498444, 'timestamp': '2025-10-01 04:40:19.701020', 'step': 22127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:19.731798', 'step': 22127, 'epoch': 3} {'type': 'loss', 'content': 0.07012249529361725, 'timestamp': '2025-10-01 04:40:19.755631', 'step': 22128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:19.786904', 'step': 22128, 'epoch': 3} {'type': 'loss', 'content': 0.08652132004499435, 'timestamp': '2025-10-01 04:40:19.788914', 'step': 22129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:40:19.819321', 'step': 22129, 'epoch': 3} {'type': 'loss', 'content': 0.04945310950279236, 'timestamp': '2025-10-01 04:40:19.823498', 'step': 22130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:19.871020', 'step': 22130, 'epoch': 3} {'type': 'loss', 'content': 0.032812707126140594, 'timestamp': '2025-10-01 04:40:19.875761', 'step': 22131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:19.907213', 'step': 22131, 'epoch': 3} {'type': 'loss', 'content': 0.04810485616326332, 'timestamp': '2025-10-01 04:40:19.930949', 'step': 22132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:19.961870', 'step': 22132, 'epoch': 3} {'type': 'loss', 'content': 0.08596108108758926, 'timestamp': '2025-10-01 04:40:19.964354', 'step': 22133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:19.995855', 'step': 22133, 'epoch': 3} {'type': 'loss', 'content': 0.1070810854434967, 'timestamp': '2025-10-01 04:40:19.998550', 'step': 22134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:20.029694', 'step': 22134, 'epoch': 3} {'type': 'loss', 'content': 0.06552844494581223, 'timestamp': '2025-10-01 04:40:20.032396', 'step': 22135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:20.072261', 'step': 22135, 'epoch': 3} {'type': 'loss', 'content': 0.04981185868382454, 'timestamp': '2025-10-01 04:40:20.096613', 'step': 22136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.127397', 'step': 22136, 'epoch': 3} {'type': 'loss', 'content': 0.06231915205717087, 'timestamp': '2025-10-01 04:40:20.130219', 'step': 22137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:20.160394', 'step': 22137, 'epoch': 3} {'type': 'loss', 'content': 0.035459112375974655, 'timestamp': '2025-10-01 04:40:20.162382', 'step': 22138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.194692', 'step': 22138, 'epoch': 3} {'type': 'loss', 'content': 0.0593671016395092, 'timestamp': '2025-10-01 04:40:20.196772', 'step': 22139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:20.227572', 'step': 22139, 'epoch': 3} {'type': 'loss', 'content': 0.08716793358325958, 'timestamp': '2025-10-01 04:40:20.251736', 'step': 22140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.283056', 'step': 22140, 'epoch': 3} {'type': 'loss', 'content': 0.041853971779346466, 'timestamp': '2025-10-01 04:40:20.285595', 'step': 22141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:20.317025', 'step': 22141, 'epoch': 3} {'type': 'loss', 'content': 0.02640734799206257, 'timestamp': '2025-10-01 04:40:20.319847', 'step': 22142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:20.351771', 'step': 22142, 'epoch': 3} {'type': 'loss', 'content': 0.053783152252435684, 'timestamp': '2025-10-01 04:40:20.354352', 'step': 22143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:20.393728', 'step': 22143, 'epoch': 3} {'type': 'loss', 'content': 0.09796573221683502, 'timestamp': '2025-10-01 04:40:20.417498', 'step': 22144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:20.448723', 'step': 22144, 'epoch': 3} {'type': 'loss', 'content': 0.06036005914211273, 'timestamp': '2025-10-01 04:40:20.450845', 'step': 22145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:20.481376', 'step': 22145, 'epoch': 3} {'type': 'loss', 'content': 0.10350511223077774, 'timestamp': '2025-10-01 04:40:20.495161', 'step': 22146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:20.525239', 'step': 22146, 'epoch': 3} {'type': 'loss', 'content': 0.035605885088443756, 'timestamp': '2025-10-01 04:40:20.527357', 'step': 22147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.557831', 'step': 22147, 'epoch': 3} {'type': 'loss', 'content': 0.023858992382884026, 'timestamp': '2025-10-01 04:40:20.583343', 'step': 22148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:40:20.613798', 'step': 22148, 'epoch': 3} {'type': 'loss', 'content': 0.035290736705064774, 'timestamp': '2025-10-01 04:40:20.616452', 'step': 22149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:20.646392', 'step': 22149, 'epoch': 3} {'type': 'loss', 'content': 0.06434319913387299, 'timestamp': '2025-10-01 04:40:20.648475', 'step': 22150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.688717', 'step': 22150, 'epoch': 3} {'type': 'loss', 'content': 0.09379592537879944, 'timestamp': '2025-10-01 04:40:20.691027', 'step': 22151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.721612', 'step': 22151, 'epoch': 3} {'type': 'loss', 'content': 0.014169425703585148, 'timestamp': '2025-10-01 04:40:20.745481', 'step': 22152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.776273', 'step': 22152, 'epoch': 3} {'type': 'loss', 'content': 0.025782380253076553, 'timestamp': '2025-10-01 04:40:20.778309', 'step': 22153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.817001', 'step': 22153, 'epoch': 3} {'type': 'loss', 'content': 0.026310374960303307, 'timestamp': '2025-10-01 04:40:20.819077', 'step': 22154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.849175', 'step': 22154, 'epoch': 3} {'type': 'loss', 'content': 0.006025961134582758, 'timestamp': '2025-10-01 04:40:20.851145', 'step': 22155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:20.881951', 'step': 22155, 'epoch': 3} {'type': 'loss', 'content': 0.011838392354547977, 'timestamp': '2025-10-01 04:40:20.905555', 'step': 22156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.936302', 'step': 22156, 'epoch': 3} {'type': 'loss', 'content': 0.04355355352163315, 'timestamp': '2025-10-01 04:40:20.938320', 'step': 22157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:20.968747', 'step': 22157, 'epoch': 3} {'type': 'loss', 'content': 0.03115912340581417, 'timestamp': '2025-10-01 04:40:20.970754', 'step': 22158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.001029', 'step': 22158, 'epoch': 3} {'type': 'loss', 'content': 0.12246549874544144, 'timestamp': '2025-10-01 04:40:21.003072', 'step': 22159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:21.033265', 'step': 22159, 'epoch': 3} {'type': 'loss', 'content': 0.11601672321557999, 'timestamp': '2025-10-01 04:40:21.056830', 'step': 22160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:21.097022', 'step': 22160, 'epoch': 3} {'type': 'loss', 'content': 0.06153639405965805, 'timestamp': '2025-10-01 04:40:21.099207', 'step': 22161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.129043', 'step': 22161, 'epoch': 3} {'type': 'loss', 'content': 0.021640773862600327, 'timestamp': '2025-10-01 04:40:21.131151', 'step': 22162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:21.161232', 'step': 22162, 'epoch': 3} {'type': 'loss', 'content': 0.1668146401643753, 'timestamp': '2025-10-01 04:40:21.163179', 'step': 22163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.198932', 'step': 22163, 'epoch': 3} {'type': 'loss', 'content': 0.06020333617925644, 'timestamp': '2025-10-01 04:40:21.222469', 'step': 22164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.252196', 'step': 22164, 'epoch': 3} {'type': 'loss', 'content': 0.07096917182207108, 'timestamp': '2025-10-01 04:40:21.254372', 'step': 22165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.284359', 'step': 22165, 'epoch': 3} {'type': 'loss', 'content': 0.06571752578020096, 'timestamp': '2025-10-01 04:40:21.286590', 'step': 22166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:21.319427', 'step': 22166, 'epoch': 3} {'type': 'loss', 'content': 0.014381305314600468, 'timestamp': '2025-10-01 04:40:21.321555', 'step': 22167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.351856', 'step': 22167, 'epoch': 3} {'type': 'loss', 'content': 0.09767939895391464, 'timestamp': '2025-10-01 04:40:21.375399', 'step': 22168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:21.405970', 'step': 22168, 'epoch': 3} {'type': 'loss', 'content': 0.05125793442130089, 'timestamp': '2025-10-01 04:40:21.408068', 'step': 22169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:21.438048', 'step': 22169, 'epoch': 3} {'type': 'loss', 'content': 0.05480557307600975, 'timestamp': '2025-10-01 04:40:21.449505', 'step': 22170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.479575', 'step': 22170, 'epoch': 3} {'type': 'loss', 'content': 0.014328702352941036, 'timestamp': '2025-10-01 04:40:21.482778', 'step': 22171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.513327', 'step': 22171, 'epoch': 3} {'type': 'loss', 'content': 0.09355083107948303, 'timestamp': '2025-10-01 04:40:21.537088', 'step': 22172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:21.567439', 'step': 22172, 'epoch': 3} {'type': 'loss', 'content': 0.11421816051006317, 'timestamp': '2025-10-01 04:40:21.569575', 'step': 22173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:21.603599', 'step': 22173, 'epoch': 3} {'type': 'loss', 'content': 0.07814661413431168, 'timestamp': '2025-10-01 04:40:21.606515', 'step': 22174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:21.637741', 'step': 22174, 'epoch': 3} {'type': 'loss', 'content': 0.04346584901213646, 'timestamp': '2025-10-01 04:40:21.639769', 'step': 22175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.671189', 'step': 22175, 'epoch': 3} {'type': 'loss', 'content': 0.07700221240520477, 'timestamp': '2025-10-01 04:40:21.694762', 'step': 22176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:21.725228', 'step': 22176, 'epoch': 3} {'type': 'loss', 'content': 0.01978539302945137, 'timestamp': '2025-10-01 04:40:21.727302', 'step': 22177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.756926', 'step': 22177, 'epoch': 3} {'type': 'loss', 'content': 0.04744784161448479, 'timestamp': '2025-10-01 04:40:21.759056', 'step': 22178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:21.788832', 'step': 22178, 'epoch': 3} {'type': 'loss', 'content': 0.15488940477371216, 'timestamp': '2025-10-01 04:40:21.790857', 'step': 22179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:21.820946', 'step': 22179, 'epoch': 3} {'type': 'loss', 'content': 0.08688636124134064, 'timestamp': '2025-10-01 04:40:21.844616', 'step': 22180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:21.875564', 'step': 22180, 'epoch': 3} {'type': 'loss', 'content': 0.1333921253681183, 'timestamp': '2025-10-01 04:40:21.877766', 'step': 22181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:21.911524', 'step': 22181, 'epoch': 3} {'type': 'loss', 'content': 0.0435355119407177, 'timestamp': '2025-10-01 04:40:21.914301', 'step': 22182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:21.944564', 'step': 22182, 'epoch': 3} {'type': 'loss', 'content': 0.025961242616176605, 'timestamp': '2025-10-01 04:40:21.947572', 'step': 22183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:21.979022', 'step': 22183, 'epoch': 3} {'type': 'loss', 'content': 0.0715656578540802, 'timestamp': '2025-10-01 04:40:22.002653', 'step': 22184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:22.032745', 'step': 22184, 'epoch': 3} {'type': 'loss', 'content': 0.11551696062088013, 'timestamp': '2025-10-01 04:40:22.034815', 'step': 22185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:22.065019', 'step': 22185, 'epoch': 3} {'type': 'loss', 'content': 0.08472079038619995, 'timestamp': '2025-10-01 04:40:22.067080', 'step': 22186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:22.097337', 'step': 22186, 'epoch': 3} {'type': 'loss', 'content': 0.020571064203977585, 'timestamp': '2025-10-01 04:40:22.099500', 'step': 22187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:22.129341', 'step': 22187, 'epoch': 3} {'type': 'loss', 'content': 0.09097381681203842, 'timestamp': '2025-10-01 04:40:22.152955', 'step': 22188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:22.182456', 'step': 22188, 'epoch': 3} {'type': 'loss', 'content': 0.05899824947118759, 'timestamp': '2025-10-01 04:40:22.184471', 'step': 22189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:22.226688', 'step': 22189, 'epoch': 3} {'type': 'loss', 'content': 0.042868971824645996, 'timestamp': '2025-10-01 04:40:22.228776', 'step': 22190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:22.259389', 'step': 22190, 'epoch': 3} {'type': 'loss', 'content': 0.16703027486801147, 'timestamp': '2025-10-01 04:40:22.261514', 'step': 22191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:22.291910', 'step': 22191, 'epoch': 3} {'type': 'loss', 'content': 0.07365226745605469, 'timestamp': '2025-10-01 04:40:22.315587', 'step': 22192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:22.352024', 'step': 22192, 'epoch': 3} {'type': 'loss', 'content': 0.05208462104201317, 'timestamp': '2025-10-01 04:40:22.354051', 'step': 22193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:22.383706', 'step': 22193, 'epoch': 3} {'type': 'loss', 'content': 0.025287244468927383, 'timestamp': '2025-10-01 04:40:22.386027', 'step': 22194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:22.417400', 'step': 22194, 'epoch': 3} {'type': 'loss', 'content': 0.05648535490036011, 'timestamp': '2025-10-01 04:40:22.419863', 'step': 22195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:22.449756', 'step': 22195, 'epoch': 3} {'type': 'loss', 'content': 0.09212140738964081, 'timestamp': '2025-10-01 04:40:22.473335', 'step': 22196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:22.505445', 'step': 22196, 'epoch': 3} {'type': 'loss', 'content': 0.027104990556836128, 'timestamp': '2025-10-01 04:40:22.516926', 'step': 22197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:22.547438', 'step': 22197, 'epoch': 3} {'type': 'loss', 'content': 0.09527528285980225, 'timestamp': '2025-10-01 04:40:22.549692', 'step': 22198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:22.580224', 'step': 22198, 'epoch': 3} {'type': 'loss', 'content': 0.02933644875884056, 'timestamp': '2025-10-01 04:40:22.582297', 'step': 22199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:22.612354', 'step': 22199, 'epoch': 3} {'type': 'loss', 'content': 0.018358798697590828, 'timestamp': '2025-10-01 04:40:22.635917', 'step': 22200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:22.665924', 'step': 22200, 'epoch': 3} {'type': 'loss', 'content': 0.11981070786714554, 'timestamp': '2025-10-01 04:40:22.668096', 'step': 22201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:22.697973', 'step': 22201, 'epoch': 3} {'type': 'loss', 'content': 0.04239271953701973, 'timestamp': '2025-10-01 04:40:22.700516', 'step': 22202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:22.731066', 'step': 22202, 'epoch': 3} {'type': 'loss', 'content': 0.15818361937999725, 'timestamp': '2025-10-01 04:40:22.733115', 'step': 22203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:22.765368', 'step': 22203, 'epoch': 3} {'type': 'loss', 'content': 0.0010974829783663154, 'timestamp': '2025-10-01 04:40:22.793055', 'step': 22204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:22.823894', 'step': 22204, 'epoch': 3} {'type': 'loss', 'content': 0.013673610985279083, 'timestamp': '2025-10-01 04:40:22.826197', 'step': 22205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:22.858582', 'step': 22205, 'epoch': 3} {'type': 'loss', 'content': 0.05541601777076721, 'timestamp': '2025-10-01 04:40:22.861409', 'step': 22206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:22.893346', 'step': 22206, 'epoch': 3} {'type': 'loss', 'content': 0.06067023426294327, 'timestamp': '2025-10-01 04:40:22.895641', 'step': 22207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:22.925855', 'step': 22207, 'epoch': 3} {'type': 'loss', 'content': 0.05675022676587105, 'timestamp': '2025-10-01 04:40:22.949372', 'step': 22208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:22.979516', 'step': 22208, 'epoch': 3} {'type': 'loss', 'content': 0.04832017049193382, 'timestamp': '2025-10-01 04:40:22.981890', 'step': 22209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.012323', 'step': 22209, 'epoch': 3} {'type': 'loss', 'content': 0.04902581498026848, 'timestamp': '2025-10-01 04:40:23.014578', 'step': 22210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:23.077682', 'step': 22210, 'epoch': 3} {'type': 'loss', 'content': 0.06595724821090698, 'timestamp': '2025-10-01 04:40:23.079703', 'step': 22211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.109519', 'step': 22211, 'epoch': 3} {'type': 'loss', 'content': 0.023779364302754402, 'timestamp': '2025-10-01 04:40:23.133118', 'step': 22212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.162758', 'step': 22212, 'epoch': 3} {'type': 'loss', 'content': 0.07503272593021393, 'timestamp': '2025-10-01 04:40:23.164760', 'step': 22213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.194857', 'step': 22213, 'epoch': 3} {'type': 'loss', 'content': 0.031176285818219185, 'timestamp': '2025-10-01 04:40:23.196845', 'step': 22214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.227029', 'step': 22214, 'epoch': 3} {'type': 'loss', 'content': 0.0352117195725441, 'timestamp': '2025-10-01 04:40:23.229150', 'step': 22215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.259745', 'step': 22215, 'epoch': 3} {'type': 'loss', 'content': 0.06893830746412277, 'timestamp': '2025-10-01 04:40:23.283438', 'step': 22216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:23.313467', 'step': 22216, 'epoch': 3} {'type': 'loss', 'content': 0.05984271317720413, 'timestamp': '2025-10-01 04:40:23.315967', 'step': 22217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.345897', 'step': 22217, 'epoch': 3} {'type': 'loss', 'content': 0.12492357939481735, 'timestamp': '2025-10-01 04:40:23.347802', 'step': 22218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.378046', 'step': 22218, 'epoch': 3} {'type': 'loss', 'content': 0.03604818880558014, 'timestamp': '2025-10-01 04:40:23.380211', 'step': 22219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:23.410198', 'step': 22219, 'epoch': 3} {'type': 'loss', 'content': 0.028694624081254005, 'timestamp': '2025-10-01 04:40:23.433593', 'step': 22220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:23.463552', 'step': 22220, 'epoch': 3} {'type': 'loss', 'content': 0.1213570162653923, 'timestamp': '2025-10-01 04:40:23.466068', 'step': 22221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.495849', 'step': 22221, 'epoch': 3} {'type': 'loss', 'content': 0.0945456400513649, 'timestamp': '2025-10-01 04:40:23.498002', 'step': 22222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:23.528171', 'step': 22222, 'epoch': 3} {'type': 'loss', 'content': 0.0730123221874237, 'timestamp': '2025-10-01 04:40:23.530582', 'step': 22223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:23.561550', 'step': 22223, 'epoch': 3} {'type': 'loss', 'content': 0.026182090863585472, 'timestamp': '2025-10-01 04:40:23.585220', 'step': 22224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:23.616465', 'step': 22224, 'epoch': 3} {'type': 'loss', 'content': 0.029736187309026718, 'timestamp': '2025-10-01 04:40:23.618540', 'step': 22225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.649343', 'step': 22225, 'epoch': 3} {'type': 'loss', 'content': 0.06797602027654648, 'timestamp': '2025-10-01 04:40:23.658674', 'step': 22226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:23.690175', 'step': 22226, 'epoch': 3} {'type': 'loss', 'content': 0.0648108571767807, 'timestamp': '2025-10-01 04:40:23.692300', 'step': 22227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:23.724479', 'step': 22227, 'epoch': 3} {'type': 'loss', 'content': 0.06874479353427887, 'timestamp': '2025-10-01 04:40:23.748008', 'step': 22228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:23.777844', 'step': 22228, 'epoch': 3} {'type': 'loss', 'content': 0.04192127659916878, 'timestamp': '2025-10-01 04:40:23.780243', 'step': 22229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:23.810384', 'step': 22229, 'epoch': 3} {'type': 'loss', 'content': 0.05927899479866028, 'timestamp': '2025-10-01 04:40:23.812365', 'step': 22230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:23.843281', 'step': 22230, 'epoch': 3} {'type': 'loss', 'content': 0.04290496185421944, 'timestamp': '2025-10-01 04:40:23.845300', 'step': 22231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:23.875935', 'step': 22231, 'epoch': 3} {'type': 'loss', 'content': 0.07284296303987503, 'timestamp': '2025-10-01 04:40:23.899423', 'step': 22232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:23.930577', 'step': 22232, 'epoch': 3} {'type': 'loss', 'content': 0.0680357962846756, 'timestamp': '2025-10-01 04:40:23.935655', 'step': 22233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:23.966778', 'step': 22233, 'epoch': 3} {'type': 'loss', 'content': 0.0597202368080616, 'timestamp': '2025-10-01 04:40:23.969297', 'step': 22234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:24.000541', 'step': 22234, 'epoch': 3} {'type': 'loss', 'content': 0.07911935448646545, 'timestamp': '2025-10-01 04:40:24.002635', 'step': 22235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:24.036680', 'step': 22235, 'epoch': 3} {'type': 'loss', 'content': 0.06487582623958588, 'timestamp': '2025-10-01 04:40:24.060357', 'step': 22236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:24.091479', 'step': 22236, 'epoch': 3} {'type': 'loss', 'content': 0.06546150892972946, 'timestamp': '2025-10-01 04:40:24.095087', 'step': 22237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:24.127410', 'step': 22237, 'epoch': 3} {'type': 'loss', 'content': 0.07458657771348953, 'timestamp': '2025-10-01 04:40:24.129955', 'step': 22238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:24.161821', 'step': 22238, 'epoch': 3} {'type': 'loss', 'content': 0.043014705181121826, 'timestamp': '2025-10-01 04:40:24.164532', 'step': 22239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:24.196247', 'step': 22239, 'epoch': 3} {'type': 'loss', 'content': 0.08410739153623581, 'timestamp': '2025-10-01 04:40:24.219865', 'step': 22240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:24.260804', 'step': 22240, 'epoch': 3} {'type': 'loss', 'content': 0.15766698122024536, 'timestamp': '2025-10-01 04:40:24.263077', 'step': 22241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:24.307512', 'step': 22241, 'epoch': 3} {'type': 'loss', 'content': 0.07829392701387405, 'timestamp': '2025-10-01 04:40:24.309812', 'step': 22242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:24.341763', 'step': 22242, 'epoch': 3} {'type': 'loss', 'content': 0.07074946910142899, 'timestamp': '2025-10-01 04:40:24.343833', 'step': 22243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:24.374642', 'step': 22243, 'epoch': 3} {'type': 'loss', 'content': 0.020946849137544632, 'timestamp': '2025-10-01 04:40:24.398516', 'step': 22244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:24.429749', 'step': 22244, 'epoch': 3} {'type': 'loss', 'content': 0.07334775477647781, 'timestamp': '2025-10-01 04:40:24.432309', 'step': 22245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:24.462310', 'step': 22245, 'epoch': 3} {'type': 'loss', 'content': 0.05292297527194023, 'timestamp': '2025-10-01 04:40:24.466897', 'step': 22246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:24.498136', 'step': 22246, 'epoch': 3} {'type': 'loss', 'content': 0.060928840190172195, 'timestamp': '2025-10-01 04:40:24.500220', 'step': 22247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:24.530933', 'step': 22247, 'epoch': 3} {'type': 'loss', 'content': 0.054075464606285095, 'timestamp': '2025-10-01 04:40:24.554651', 'step': 22248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:24.585615', 'step': 22248, 'epoch': 3} {'type': 'loss', 'content': 0.04326886311173439, 'timestamp': '2025-10-01 04:40:24.587884', 'step': 22249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:24.618564', 'step': 22249, 'epoch': 3} {'type': 'loss', 'content': 0.13664039969444275, 'timestamp': '2025-10-01 04:40:24.620727', 'step': 22250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:24.652971', 'step': 22250, 'epoch': 3} {'type': 'loss', 'content': 0.027448061853647232, 'timestamp': '2025-10-01 04:40:24.655025', 'step': 22251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:24.685590', 'step': 22251, 'epoch': 3} {'type': 'loss', 'content': 0.08661293983459473, 'timestamp': '2025-10-01 04:40:24.709340', 'step': 22252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:24.740364', 'step': 22252, 'epoch': 3} {'type': 'loss', 'content': 0.05641169100999832, 'timestamp': '2025-10-01 04:40:24.743296', 'step': 22253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:24.773693', 'step': 22253, 'epoch': 3} {'type': 'loss', 'content': 0.016081182286143303, 'timestamp': '2025-10-01 04:40:24.776020', 'step': 22254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:24.808533', 'step': 22254, 'epoch': 3} {'type': 'loss', 'content': 0.06664452701807022, 'timestamp': '2025-10-01 04:40:24.810701', 'step': 22255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:24.842070', 'step': 22255, 'epoch': 3} {'type': 'loss', 'content': 0.04538445174694061, 'timestamp': '2025-10-01 04:40:24.865732', 'step': 22256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:24.896941', 'step': 22256, 'epoch': 3} {'type': 'loss', 'content': 0.0898309201002121, 'timestamp': '2025-10-01 04:40:24.899429', 'step': 22257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:24.930677', 'step': 22257, 'epoch': 3} {'type': 'loss', 'content': 0.029238028451800346, 'timestamp': '2025-10-01 04:40:24.932543', 'step': 22258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:24.966229', 'step': 22258, 'epoch': 3} {'type': 'loss', 'content': 0.05226777121424675, 'timestamp': '2025-10-01 04:40:24.969258', 'step': 22259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:25.000786', 'step': 22259, 'epoch': 3} {'type': 'loss', 'content': 0.0747339129447937, 'timestamp': '2025-10-01 04:40:25.024288', 'step': 22260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:25.055882', 'step': 22260, 'epoch': 3} {'type': 'loss', 'content': 0.11237255483865738, 'timestamp': '2025-10-01 04:40:25.058497', 'step': 22261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:25.089836', 'step': 22261, 'epoch': 3} {'type': 'loss', 'content': 0.05995526537299156, 'timestamp': '2025-10-01 04:40:25.092328', 'step': 22262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:25.124668', 'step': 22262, 'epoch': 3} {'type': 'loss', 'content': 0.025742951780557632, 'timestamp': '2025-10-01 04:40:25.134853', 'step': 22263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:25.168206', 'step': 22263, 'epoch': 3} {'type': 'loss', 'content': 0.07759542018175125, 'timestamp': '2025-10-01 04:40:25.191834', 'step': 22264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:25.222893', 'step': 22264, 'epoch': 3} {'type': 'loss', 'content': 0.04300147667527199, 'timestamp': '2025-10-01 04:40:25.225383', 'step': 22265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:25.256130', 'step': 22265, 'epoch': 3} {'type': 'loss', 'content': 0.03669575974345207, 'timestamp': '2025-10-01 04:40:25.258290', 'step': 22266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:25.292089', 'step': 22266, 'epoch': 3} {'type': 'loss', 'content': 0.05269990488886833, 'timestamp': '2025-10-01 04:40:25.294803', 'step': 22267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:25.326627', 'step': 22267, 'epoch': 3} {'type': 'loss', 'content': 0.0771518126130104, 'timestamp': '2025-10-01 04:40:25.350129', 'step': 22268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:25.382030', 'step': 22268, 'epoch': 3} {'type': 'loss', 'content': 0.011101304553449154, 'timestamp': '2025-10-01 04:40:25.384211', 'step': 22269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:25.433738', 'step': 22269, 'epoch': 3} {'type': 'loss', 'content': 0.0655231773853302, 'timestamp': '2025-10-01 04:40:25.436186', 'step': 22270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:25.467219', 'step': 22270, 'epoch': 3} {'type': 'loss', 'content': 0.07211808115243912, 'timestamp': '2025-10-01 04:40:25.469227', 'step': 22271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:25.502730', 'step': 22271, 'epoch': 3} {'type': 'loss', 'content': 0.1020217016339302, 'timestamp': '2025-10-01 04:40:25.531494', 'step': 22272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:25.561174', 'step': 22272, 'epoch': 3} {'type': 'loss', 'content': 0.03840739652514458, 'timestamp': '2025-10-01 04:40:25.563213', 'step': 22273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:25.599305', 'step': 22273, 'epoch': 3} {'type': 'loss', 'content': 0.04226021096110344, 'timestamp': '2025-10-01 04:40:25.601375', 'step': 22274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:25.639421', 'step': 22274, 'epoch': 3} {'type': 'loss', 'content': 0.034630101174116135, 'timestamp': '2025-10-01 04:40:25.641736', 'step': 22275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:25.671951', 'step': 22275, 'epoch': 3} {'type': 'loss', 'content': 0.06425397843122482, 'timestamp': '2025-10-01 04:40:25.695518', 'step': 22276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:25.726104', 'step': 22276, 'epoch': 3} {'type': 'loss', 'content': 0.14547964930534363, 'timestamp': '2025-10-01 04:40:25.728277', 'step': 22277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:25.759168', 'step': 22277, 'epoch': 3} {'type': 'loss', 'content': 0.037392474710941315, 'timestamp': '2025-10-01 04:40:25.761596', 'step': 22278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:25.792533', 'step': 22278, 'epoch': 3} {'type': 'loss', 'content': 0.06434110552072525, 'timestamp': '2025-10-01 04:40:25.795313', 'step': 22279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:25.825406', 'step': 22279, 'epoch': 3} {'type': 'loss', 'content': 0.04411407932639122, 'timestamp': '2025-10-01 04:40:25.848941', 'step': 22280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:25.880625', 'step': 22280, 'epoch': 3} {'type': 'loss', 'content': 0.053905416280031204, 'timestamp': '2025-10-01 04:40:25.883286', 'step': 22281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:25.914127', 'step': 22281, 'epoch': 3} {'type': 'loss', 'content': 0.07372907549142838, 'timestamp': '2025-10-01 04:40:25.916288', 'step': 22282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:25.946053', 'step': 22282, 'epoch': 3} {'type': 'loss', 'content': 0.008746457286179066, 'timestamp': '2025-10-01 04:40:25.948634', 'step': 22283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:25.980314', 'step': 22283, 'epoch': 3} {'type': 'loss', 'content': 0.04892989248037338, 'timestamp': '2025-10-01 04:40:26.005673', 'step': 22284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:26.035788', 'step': 22284, 'epoch': 3} {'type': 'loss', 'content': 0.09148772060871124, 'timestamp': '2025-10-01 04:40:26.037919', 'step': 22285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:26.067499', 'step': 22285, 'epoch': 3} {'type': 'loss', 'content': 0.025357767939567566, 'timestamp': '2025-10-01 04:40:26.070223', 'step': 22286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:26.100171', 'step': 22286, 'epoch': 3} {'type': 'loss', 'content': 0.016434120014309883, 'timestamp': '2025-10-01 04:40:26.102724', 'step': 22287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:26.132303', 'step': 22287, 'epoch': 3} {'type': 'loss', 'content': 0.11572786420583725, 'timestamp': '2025-10-01 04:40:26.155858', 'step': 22288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.186295', 'step': 22288, 'epoch': 3} {'type': 'loss', 'content': 0.06408283114433289, 'timestamp': '2025-10-01 04:40:26.188301', 'step': 22289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.218363', 'step': 22289, 'epoch': 3} {'type': 'loss', 'content': 0.10142744332551956, 'timestamp': '2025-10-01 04:40:26.220483', 'step': 22290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-10-01 04:40:26.250670', 'step': 22290, 'epoch': 3} {'type': 'loss', 'content': 0.026018580421805382, 'timestamp': '2025-10-01 04:40:26.255017', 'step': 22291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.297881', 'step': 22291, 'epoch': 3} {'type': 'loss', 'content': 0.10708530992269516, 'timestamp': '2025-10-01 04:40:26.321571', 'step': 22292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.353596', 'step': 22292, 'epoch': 3} {'type': 'loss', 'content': 0.14701713621616364, 'timestamp': '2025-10-01 04:40:26.355742', 'step': 22293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:26.388650', 'step': 22293, 'epoch': 3} {'type': 'loss', 'content': 0.040738966315984726, 'timestamp': '2025-10-01 04:40:26.390962', 'step': 22294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.422203', 'step': 22294, 'epoch': 3} {'type': 'loss', 'content': 0.052372731268405914, 'timestamp': '2025-10-01 04:40:26.424637', 'step': 22295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:26.455189', 'step': 22295, 'epoch': 3} {'type': 'loss', 'content': 0.09075597673654556, 'timestamp': '2025-10-01 04:40:26.479185', 'step': 22296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:26.511942', 'step': 22296, 'epoch': 3} {'type': 'loss', 'content': 0.06263943761587143, 'timestamp': '2025-10-01 04:40:26.513988', 'step': 22297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:26.543681', 'step': 22297, 'epoch': 3} {'type': 'loss', 'content': 0.03684661537408829, 'timestamp': '2025-10-01 04:40:26.545633', 'step': 22298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.576150', 'step': 22298, 'epoch': 3} {'type': 'loss', 'content': 0.022914860397577286, 'timestamp': '2025-10-01 04:40:26.578065', 'step': 22299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:26.617034', 'step': 22299, 'epoch': 3} {'type': 'loss', 'content': 0.0433732271194458, 'timestamp': '2025-10-01 04:40:26.640913', 'step': 22300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.670666', 'step': 22300, 'epoch': 3} {'type': 'loss', 'content': 0.11405479162931442, 'timestamp': '2025-10-01 04:40:26.672548', 'step': 22301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:26.702367', 'step': 22301, 'epoch': 3} {'type': 'loss', 'content': 0.047256264835596085, 'timestamp': '2025-10-01 04:40:26.704334', 'step': 22302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.748403', 'step': 22302, 'epoch': 3} {'type': 'loss', 'content': 0.058688800781965256, 'timestamp': '2025-10-01 04:40:26.750495', 'step': 22303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.780458', 'step': 22303, 'epoch': 3} {'type': 'loss', 'content': 0.09329766035079956, 'timestamp': '2025-10-01 04:40:26.804202', 'step': 22304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.834974', 'step': 22304, 'epoch': 3} {'type': 'loss', 'content': 0.06992960721254349, 'timestamp': '2025-10-01 04:40:26.837044', 'step': 22305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:26.867622', 'step': 22305, 'epoch': 3} {'type': 'loss', 'content': 0.06276485323905945, 'timestamp': '2025-10-01 04:40:26.870262', 'step': 22306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:26.911798', 'step': 22306, 'epoch': 3} {'type': 'loss', 'content': 0.007925888523459435, 'timestamp': '2025-10-01 04:40:26.915459', 'step': 22307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:26.947310', 'step': 22307, 'epoch': 3} {'type': 'loss', 'content': 0.04770068824291229, 'timestamp': '2025-10-01 04:40:26.970857', 'step': 22308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:27.003029', 'step': 22308, 'epoch': 3} {'type': 'loss', 'content': 0.06365402787923813, 'timestamp': '2025-10-01 04:40:27.005236', 'step': 22309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:27.036696', 'step': 22309, 'epoch': 3} {'type': 'loss', 'content': 0.02858726494014263, 'timestamp': '2025-10-01 04:40:27.039225', 'step': 22310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:27.069658', 'step': 22310, 'epoch': 3} {'type': 'loss', 'content': 0.09842228889465332, 'timestamp': '2025-10-01 04:40:27.071893', 'step': 22311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:27.101879', 'step': 22311, 'epoch': 3} {'type': 'loss', 'content': 0.08846786618232727, 'timestamp': '2025-10-01 04:40:27.125731', 'step': 22312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:27.157148', 'step': 22312, 'epoch': 3} {'type': 'loss', 'content': 0.03867114707827568, 'timestamp': '2025-10-01 04:40:27.159533', 'step': 22313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:27.196836', 'step': 22313, 'epoch': 3} {'type': 'loss', 'content': 0.09139607101678848, 'timestamp': '2025-10-01 04:40:27.199236', 'step': 22314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:27.229556', 'step': 22314, 'epoch': 3} {'type': 'loss', 'content': 0.031396057456731796, 'timestamp': '2025-10-01 04:40:27.231745', 'step': 22315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:27.262204', 'step': 22315, 'epoch': 3} {'type': 'loss', 'content': 0.026888806372880936, 'timestamp': '2025-10-01 04:40:27.285804', 'step': 22316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:27.315979', 'step': 22316, 'epoch': 3} {'type': 'loss', 'content': 0.053057752549648285, 'timestamp': '2025-10-01 04:40:27.318169', 'step': 22317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:27.349277', 'step': 22317, 'epoch': 3} {'type': 'loss', 'content': 0.03812966123223305, 'timestamp': '2025-10-01 04:40:27.351570', 'step': 22318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:27.390566', 'step': 22318, 'epoch': 3} {'type': 'loss', 'content': 0.023755289614200592, 'timestamp': '2025-10-01 04:40:27.392890', 'step': 22319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:27.423617', 'step': 22319, 'epoch': 3} {'type': 'loss', 'content': 0.030062485486268997, 'timestamp': '2025-10-01 04:40:27.447378', 'step': 22320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:27.478236', 'step': 22320, 'epoch': 3} {'type': 'loss', 'content': 0.13555888831615448, 'timestamp': '2025-10-01 04:40:27.480592', 'step': 22321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:27.512047', 'step': 22321, 'epoch': 3} {'type': 'loss', 'content': 0.08956827223300934, 'timestamp': '2025-10-01 04:40:27.514483', 'step': 22322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:27.546031', 'step': 22322, 'epoch': 3} {'type': 'loss', 'content': 0.05894508585333824, 'timestamp': '2025-10-01 04:40:27.548301', 'step': 22323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:27.579053', 'step': 22323, 'epoch': 3} {'type': 'loss', 'content': 0.08345624059438705, 'timestamp': '2025-10-01 04:40:27.603074', 'step': 22324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:27.633791', 'step': 22324, 'epoch': 3} {'type': 'loss', 'content': 0.034758880734443665, 'timestamp': '2025-10-01 04:40:27.636027', 'step': 22325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:27.666596', 'step': 22325, 'epoch': 3} {'type': 'loss', 'content': 0.05514072626829147, 'timestamp': '2025-10-01 04:40:27.668651', 'step': 22326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:27.699023', 'step': 22326, 'epoch': 3} {'type': 'loss', 'content': 0.05556788668036461, 'timestamp': '2025-10-01 04:40:27.701175', 'step': 22327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:27.731812', 'step': 22327, 'epoch': 3} {'type': 'loss', 'content': 0.17088274657726288, 'timestamp': '2025-10-01 04:40:27.755655', 'step': 22328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:27.785928', 'step': 22328, 'epoch': 3} {'type': 'loss', 'content': 0.12388987094163895, 'timestamp': '2025-10-01 04:40:27.788144', 'step': 22329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:27.818332', 'step': 22329, 'epoch': 3} {'type': 'loss', 'content': 0.019281061366200447, 'timestamp': '2025-10-01 04:40:27.820763', 'step': 22330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:27.852099', 'step': 22330, 'epoch': 3} {'type': 'loss', 'content': 0.011860212311148643, 'timestamp': '2025-10-01 04:40:27.854285', 'step': 22331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:27.885161', 'step': 22331, 'epoch': 3} {'type': 'loss', 'content': 0.05091407522559166, 'timestamp': '2025-10-01 04:40:27.909647', 'step': 22332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:27.940022', 'step': 22332, 'epoch': 3} {'type': 'loss', 'content': 0.04895247146487236, 'timestamp': '2025-10-01 04:40:27.942351', 'step': 22333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:27.973056', 'step': 22333, 'epoch': 3} {'type': 'loss', 'content': 0.03796488419175148, 'timestamp': '2025-10-01 04:40:27.975450', 'step': 22334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.007068', 'step': 22334, 'epoch': 3} {'type': 'loss', 'content': 0.061292123049497604, 'timestamp': '2025-10-01 04:40:28.009312', 'step': 22335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:28.039896', 'step': 22335, 'epoch': 3} {'type': 'loss', 'content': 0.026179805397987366, 'timestamp': '2025-10-01 04:40:28.063527', 'step': 22336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:28.104602', 'step': 22336, 'epoch': 3} {'type': 'loss', 'content': 0.1007571816444397, 'timestamp': '2025-10-01 04:40:28.107018', 'step': 22337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:28.137404', 'step': 22337, 'epoch': 3} {'type': 'loss', 'content': 0.02857297845184803, 'timestamp': '2025-10-01 04:40:28.139741', 'step': 22338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.171775', 'step': 22338, 'epoch': 3} {'type': 'loss', 'content': 0.11262022703886032, 'timestamp': '2025-10-01 04:40:28.175414', 'step': 22339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:28.207419', 'step': 22339, 'epoch': 3} {'type': 'loss', 'content': 0.07387585192918777, 'timestamp': '2025-10-01 04:40:28.231321', 'step': 22340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:28.262474', 'step': 22340, 'epoch': 3} {'type': 'loss', 'content': 0.009353694505989552, 'timestamp': '2025-10-01 04:40:28.265498', 'step': 22341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.298332', 'step': 22341, 'epoch': 3} {'type': 'loss', 'content': 0.07669162005186081, 'timestamp': '2025-10-01 04:40:28.300971', 'step': 22342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.332049', 'step': 22342, 'epoch': 3} {'type': 'loss', 'content': 0.0454106330871582, 'timestamp': '2025-10-01 04:40:28.334475', 'step': 22343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.365460', 'step': 22343, 'epoch': 3} {'type': 'loss', 'content': 0.04702442139387131, 'timestamp': '2025-10-01 04:40:28.389549', 'step': 22344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.419921', 'step': 22344, 'epoch': 3} {'type': 'loss', 'content': 0.1043553575873375, 'timestamp': '2025-10-01 04:40:28.421989', 'step': 22345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.453056', 'step': 22345, 'epoch': 3} {'type': 'loss', 'content': 0.012151437811553478, 'timestamp': '2025-10-01 04:40:28.455138', 'step': 22346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:28.489612', 'step': 22346, 'epoch': 3} {'type': 'loss', 'content': 0.039967432618141174, 'timestamp': '2025-10-01 04:40:28.491578', 'step': 22347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:28.521853', 'step': 22347, 'epoch': 3} {'type': 'loss', 'content': 0.04295502230525017, 'timestamp': '2025-10-01 04:40:28.545667', 'step': 22348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:28.576405', 'step': 22348, 'epoch': 3} {'type': 'loss', 'content': 0.0900145024061203, 'timestamp': '2025-10-01 04:40:28.579063', 'step': 22349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.612798', 'step': 22349, 'epoch': 3} {'type': 'loss', 'content': 0.07860998809337616, 'timestamp': '2025-10-01 04:40:28.614906', 'step': 22350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:28.647126', 'step': 22350, 'epoch': 3} {'type': 'loss', 'content': 0.10156190395355225, 'timestamp': '2025-10-01 04:40:28.649144', 'step': 22351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:28.679582', 'step': 22351, 'epoch': 3} {'type': 'loss', 'content': 0.09534840285778046, 'timestamp': '2025-10-01 04:40:28.703631', 'step': 22352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:28.734596', 'step': 22352, 'epoch': 3} {'type': 'loss', 'content': 0.059767574071884155, 'timestamp': '2025-10-01 04:40:28.736781', 'step': 22353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-10-01 04:40:28.769797', 'step': 22353, 'epoch': 3} {'type': 'loss', 'content': 0.018857726827263832, 'timestamp': '2025-10-01 04:40:28.772060', 'step': 22354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.803564', 'step': 22354, 'epoch': 3} {'type': 'loss', 'content': 0.07932659983634949, 'timestamp': '2025-10-01 04:40:28.805624', 'step': 22355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.837582', 'step': 22355, 'epoch': 3} {'type': 'loss', 'content': 0.04879383370280266, 'timestamp': '2025-10-01 04:40:28.861311', 'step': 22356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:28.893259', 'step': 22356, 'epoch': 3} {'type': 'loss', 'content': 0.07625355571508408, 'timestamp': '2025-10-01 04:40:28.895401', 'step': 22357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:28.926276', 'step': 22357, 'epoch': 3} {'type': 'loss', 'content': 0.009502073749899864, 'timestamp': '2025-10-01 04:40:28.928810', 'step': 22358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:28.970704', 'step': 22358, 'epoch': 3} {'type': 'loss', 'content': 0.0468459390103817, 'timestamp': '2025-10-01 04:40:28.973054', 'step': 22359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:29.005549', 'step': 22359, 'epoch': 3} {'type': 'loss', 'content': 0.05887731909751892, 'timestamp': '2025-10-01 04:40:29.029198', 'step': 22360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:29.058809', 'step': 22360, 'epoch': 3} {'type': 'loss', 'content': 0.051798757165670395, 'timestamp': '2025-10-01 04:40:29.061055', 'step': 22361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:29.091106', 'step': 22361, 'epoch': 3} {'type': 'loss', 'content': 0.057062819600105286, 'timestamp': '2025-10-01 04:40:29.093094', 'step': 22362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-10-01 04:40:29.130793', 'step': 22362, 'epoch': 3} {'type': 'loss', 'content': 0.07878977805376053, 'timestamp': '2025-10-01 04:40:29.135524', 'step': 22363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:29.166478', 'step': 22363, 'epoch': 3} {'type': 'loss', 'content': 0.028042146936058998, 'timestamp': '2025-10-01 04:40:29.190698', 'step': 22364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:29.224215', 'step': 22364, 'epoch': 3} {'type': 'loss', 'content': 0.03210332989692688, 'timestamp': '2025-10-01 04:40:29.226453', 'step': 22365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:29.267504', 'step': 22365, 'epoch': 3} {'type': 'loss', 'content': 0.035581037402153015, 'timestamp': '2025-10-01 04:40:29.269790', 'step': 22366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:29.300404', 'step': 22366, 'epoch': 3} {'type': 'loss', 'content': 0.11601762473583221, 'timestamp': '2025-10-01 04:40:29.302696', 'step': 22367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:29.334287', 'step': 22367, 'epoch': 3} {'type': 'loss', 'content': 0.09680811315774918, 'timestamp': '2025-10-01 04:40:29.358013', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:40:37.964136', 'step': 22368, 'epoch': 3} {'type': 'pplx', 'content': 13923.065214312519, 'timestamp': '2025-10-01 04:40:37.980754', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-10-01 04:40:38.010536', 'step': 22368, 'epoch': 3} {'type': 'loss', 'content': 0.08077815920114517, 'timestamp': '2025-10-01 04:40:38.012913', 'step': 22369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:38.044544', 'step': 22369, 'epoch': 3} {'type': 'loss', 'content': 0.058050911873579025, 'timestamp': '2025-10-01 04:40:38.046804', 'step': 22370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:38.078183', 'step': 22370, 'epoch': 3} {'type': 'loss', 'content': 0.04173169657588005, 'timestamp': '2025-10-01 04:40:38.080265', 'step': 22371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:38.111675', 'step': 22371, 'epoch': 3} {'type': 'loss', 'content': 0.02455127239227295, 'timestamp': '2025-10-01 04:40:38.135671', 'step': 22372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:38.166166', 'step': 22372, 'epoch': 3} {'type': 'loss', 'content': 0.04113444313406944, 'timestamp': '2025-10-01 04:40:38.169030', 'step': 22373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:38.199394', 'step': 22373, 'epoch': 3} {'type': 'loss', 'content': 0.025584649294614792, 'timestamp': '2025-10-01 04:40:38.201653', 'step': 22374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-10-01 04:40:38.234115', 'step': 22374, 'epoch': 3} {'type': 'loss', 'content': 0.1303388625383377, 'timestamp': '2025-10-01 04:40:38.236407', 'step': 22375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-10-01 04:40:38.267484', 'step': 22375, 'epoch': 3} {'type': 'loss', 'content': 0.04348066449165344, 'timestamp': '2025-10-01 04:40:38.291265', 'step': 22376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:38.321975', 'step': 22376, 'epoch': 3} {'type': 'loss', 'content': 0.05066145211458206, 'timestamp': '2025-10-01 04:40:38.324203', 'step': 22377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-10-01 04:40:38.354916', 'step': 22377, 'epoch': 3} {'type': 'loss', 'content': 0.028979847207665443, 'timestamp': '2025-10-01 04:40:38.356912', 'step': 22378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-10-01 04:40:38.386901', 'step': 22378, 'epoch': 3} {'type': 'loss', 'content': 0.018909849226474762, 'timestamp': '2025-10-01 04:40:38.388858', 'step': 22379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1542724875376}, 'timestamp': '2025-10-01 04:40:38.419013', 'step': 22379, 'epoch': 3} {'type': 'loss', 'content': 0.0007445770897902548, 'timestamp': '2025-10-01 04:40:38.442608', 'step': 22380, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-10-01 04:40:47.067939', 'step': 22380, 'epoch': 3} {'type': 'pplx', 'content': 14067.037843330761, 'timestamp': '2025-10-01 04:40:47.070627', 'step': 22380, 'epoch': 3} {'type': 'best_pplx', 'content': 7734.8395379306885, 'timestamp': '2025-10-01 04:40:47.072358', 'step': 22380, 'epoch': 3} {'type': 'best_step', 'content': 932, 'timestamp': '2025-10-01 04:40:47.074103', 'step': 22380, 'epoch': 3} {'type': 'total_pplx_flops', 'content': 50323539177094400, 'timestamp': '2025-10-01 04:40:47.075384', 'step': 22380, 'epoch': 3} {'type': 'total_train_flops', 'content': 76563139214162640, 'timestamp': '2025-10-01 04:40:47.077597', 'step': 22380, 'epoch': 3}